Please install the required Python modules/SDKs¶

In [1]:
! activate ai-azure-c1

import sys

sys.path.append("/opt/conda/envs/ai-azure-c1/lib/python3.8/site-packages")

Importing Azure Form Recognizer Python modules¶

In [2]:
import os
import io
import glob
import time
import sys
import uuid
import datetime
import requests
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image, ImageDraw
from io import BytesIO

from azure.core.exceptions import ResourceNotFoundError
from azure.ai.formrecognizer import FormRecognizerClient, FormTrainingClient
from azure.core.credentials import AzureKeyCredential
from azure.cognitiveservices.vision.face import FaceClient
from azure.cognitiveservices.vision.face.models import TrainingStatusType
from azure.cognitiveservices.vision.customvision.training import CustomVisionTrainingClient
from azure.cognitiveservices.vision.customvision.prediction import CustomVisionPredictionClient
from azure.cognitiveservices.vision.customvision.training.models import ImageFileCreateBatch, ImageFileCreateEntry, Region
from azure.storage.blob import BlobServiceClient
from msrest.authentication import CognitiveServicesCredentials, ApiKeyCredentials

from video_indexer import VideoIndexer
from urllib.parse import urlparse
from matplotlib.pyplot import imshow
In [ ]:
 

Step 3¶

Upload Video¶

In [3]:
video_analysis = VideoIndexer(
    vi_subscription_key='',
    vi_location="trial",
    vi_account_id=""
)
In [4]:
video_analysis.check_access_token()
Getting video indexer access token...
Access Token: eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJWZXJzaW9uIjoiMi4wLjAuMCIsIktleVZlcnNpb24iOiI5NGQyMWY0ZDZlY2Y0MzRmOGUzYmRhMDVjMWU3MmRhZCIsIkFjY291bnRJZCI6ImJlYmJiOGFmLTIyMTQtNGU0Yy1iNjczLTRiN2Q2MDVjYmMyYiIsIkFjY291bnRUeXBlIjoiVHJpYWwiLCJQZXJtaXNzaW9uIjoiQ29udHJpYnV0b3IiLCJFeHRlcm5hbFVzZXJJZCI6IjRBMjhFM0NFQUU4ODQyMDk5RDQyRTZGRThCREIxRjA4IiwiVXNlclR5cGUiOiJNaWNyb3NvZnRDb3JwQWFkIiwiSXNzdWVyTG9jYXRpb24iOiJUcmlhbCIsIm5iZiI6MTY5ODYzMzI2MSwiZXhwIjoxNjk4NjM3MTYxLCJpc3MiOiJodHRwczovL2FwaS52aWRlb2luZGV4ZXIuYWkvIiwiYXVkIjoiaHR0cHM6Ly9hcGkudmlkZW9pbmRleGVyLmFpLyJ9.sMvAc_llz761ge5fSSzZTvY8ifV9SHnTb8IEOFH9G0j-Gano4LP-6NP3nZ3t5Bw_RVSzatHXgEKAjVCphq_87X29CMdX-90ziwoNudchIVFF5KNOetg407-MNYbnDPtnoMRL8VK1nNffyKtN4Tl5j_HBvdjOsD_2C-YhcxD4IU65IRsa5-sdAr5HT6zGeZSjTt0JZmG8lq1ATJiAHQXTDG6J7V6lvv0Cww07ND_VASlEZHZs_1HM2Tmy-_a8YJ0-aNpmy2VqeTqZYr_8gPKm7_7gnp1dFps-2KFmBAd6aFFaWAUf-Mj-2FjOZYxg7aCavAi8zjDJFlGP9WShFxMlOA
In [5]:
video_id = '74e2ce28ab'
In [6]:
video_analysis.get_video_info(video_id)
Getting video info for: 74e2ce28ab
Out[6]:
{'partition': None,
 'description': None,
 'privacyMode': 'Private',
 'state': 'Processed',
 'accountId': 'bebbb8af-2214-4e4c-b673-4b7d605cbc2b',
 'id': '74e2ce28ab',
 'name': 'ca-dl-avkash-chauhan',
 'userName': 'Richard Helton',
 'created': '2023-10-29T08:15:02.58+00:00',
 'isOwned': True,
 'isEditable': True,
 'isBase': True,
 'durationInSeconds': 26,
 'duration': '0:00:26.499856',
 'summarizedInsights': {'name': 'ca-dl-avkash-chauhan',
  'id': '74e2ce28ab',
  'privacyMode': 'Private',
  'duration': {'time': '0:00:26.499856', 'seconds': 26.5},
  'thumbnailVideoId': '74e2ce28ab',
  'thumbnailId': '0db877fa-54fa-452b-bc3f-1072548ff26e',
  'faces': [{'videoId': '74e2ce28ab',
    'confidence': 0,
    'description': None,
    'title': None,
    'thumbnailId': 'e2f1baa2-3a5d-4cb1-8ec8-a5ac8930deae',
    'seenDuration': 26.5,
    'seenDurationRatio': 1,
    'id': 1000,
    'name': 'Unknown #1',
    'appearances': [{'startTime': '0:00:00',
      'endTime': '0:00:26.4998556',
      'startSeconds': 0,
      'endSeconds': 26.5}]}],
  'keywords': [{'isTranscript': True,
    'id': 1,
    'name': 'boarding pass',
    'appearances': [{'startTime': '0:00:12.4',
      'endTime': '0:00:13.44',
      'startSeconds': 12.4,
      'endSeconds': 13.4},
     {'startTime': '0:00:20.88',
      'endTime': '0:00:22.24',
      'startSeconds': 20.9,
      'endSeconds': 22.2}]}],
  'sentiments': [{'sentimentKey': 'Neutral',
    'seenDurationRatio': 0.8226,
    'appearances': [{'startTime': '0:00:00',
      'endTime': '0:00:07.48',
      'startSeconds': 0,
      'endSeconds': 7.5},
     {'startTime': '0:00:12.16',
      'endTime': '0:00:26.499856',
      'startSeconds': 12.2,
      'endSeconds': 26.5}]},
   {'sentimentKey': 'Negative',
    'seenDurationRatio': 0.1774,
    'appearances': [{'startTime': '0:00:07.48',
      'endTime': '0:00:12.16',
      'startSeconds': 7.5,
      'endSeconds': 12.2}]}],
  'emotions': [{'type': 'Anger',
    'seenDurationRatio': 0.1774,
    'appearances': [{'startTime': '0:00:07.48',
      'endTime': '0:00:12.16',
      'startSeconds': 7.5,
      'endSeconds': 12.2}]}],
  'audioEffects': [{'audioEffectKey': 'Silence',
    'seenDurationRatio': 0.0943,
    'seenDuration': 2.5,
    'appearances': [{'confidence': 0.8294,
      'startTime': '0:00:00',
      'endTime': '0:00:01.5',
      'startSeconds': 0,
      'endSeconds': 1.5},
     {'confidence': 0.7832,
      'startTime': '0:00:18.58',
      'endTime': '0:00:19.58',
      'startSeconds': 18.6,
      'endSeconds': 19.6}]}],
  'labels': [{'id': 1,
    'name': 'wall',
    'appearances': [{'confidence': 0.9923,
      'startTime': '0:00:00',
      'endTime': '0:00:26.1660778',
      'startSeconds': 0,
      'endSeconds': 26.2}]},
   {'id': 2,
    'name': 'human face',
    'appearances': [{'confidence': 0.9927,
      'startTime': '0:00:00',
      'endTime': '0:00:26.1660778',
      'startSeconds': 0,
      'endSeconds': 26.2}]},
   {'id': 3,
    'name': 'person',
    'appearances': [{'confidence': 0.9906,
      'startTime': '0:00:00',
      'endTime': '0:00:26.1660778',
      'startSeconds': 0,
      'endSeconds': 26.2}]},
   {'id': 4,
    'name': 'indoor',
    'appearances': [{'confidence': 0.9875,
      'startTime': '0:00:00',
      'endTime': '0:00:26.1660778',
      'startSeconds': 0,
      'endSeconds': 26.2}]}],
  'framePatterns': [],
  'brands': [],
  'namedLocations': [{'referenceId': None,
    'referenceUrl': None,
    'confidence': 0.99,
    'description': None,
    'seenDuration': 3,
    'id': 1,
    'name': 'kiosk',
    'appearances': [{'startTime': '0:00:02.52',
      'endTime': '0:00:05.52',
      'startSeconds': 2.5,
      'endSeconds': 5.5}]}],
  'namedPeople': [],
  'statistics': {'correspondenceCount': 9,
   'speakerTalkToListenRatio': {'1': 0.099,
    '2': 0.097,
    '3': 0.203,
    '4': 0.493,
    '5': 0.105},
   'speakerLongestMonolog': {'1': 1, '2': 1, '3': 2, '4': 4, '5': 1},
   'speakerNumberOfFragments': {'1': 2, '2': 2, '3': 2, '4': 3, '5': 2},
   'speakerWordCount': {'1': 7, '2': 5, '3': 7, '4': 22, '5': 4}},
  'topics': [{'referenceUrl': None,
    'iptcName': 'science and technology',
    'iabName': 'Education',
    'confidence': 0.7411,
    'id': 1,
    'name': 'Education/Technology',
    'appearances': [{'startTime': '0:00:00.68',
      'endTime': '0:00:24.68',
      'startSeconds': 0.7,
      'endSeconds': 24.7}]},
   {'referenceUrl': None,
    'iptcName': 'lifestyle and leisure/leisure/travel and tourism/tourism',
    'iabName': 'Travel',
    'confidence': 0.6612,
    'id': 2,
    'name': 'Tourism',
    'appearances': [{'startTime': '0:00:00.68',
      'endTime': '0:00:24.68',
      'startSeconds': 0.7,
      'endSeconds': 24.7}]},
   {'referenceUrl': None,
    'iptcName': 'lifestyle and leisure/leisure/travel and tourism/tourism',
    'iabName': 'Travel',
    'confidence': 0.5613,
    'id': 3,
    'name': 'Tourism/Visiting and Travel/International Travel Hacks',
    'appearances': [{'startTime': '0:00:00.68',
      'endTime': '0:00:24.68',
      'startSeconds': 0.7,
      'endSeconds': 24.7}]}]},
 'videos': [{'accountId': 'bebbb8af-2214-4e4c-b673-4b7d605cbc2b',
   'id': '74e2ce28ab',
   'state': 'Processed',
   'moderationState': 'OK',
   'reviewState': 'None',
   'privacyMode': 'Private',
   'processingProgress': '100%',
   'failureMessage': '',
   'externalId': None,
   'externalUrl': None,
   'metadata': None,
   'insights': {'version': '1.0.0.0',
    'duration': '0:00:26.499856',
    'sourceLanguage': 'en-US',
    'sourceLanguages': ['en-US'],
    'language': 'en-US',
    'languages': ['en-US'],
    'transcript': [{'id': 1,
      'text': 'So this is the video.',
      'confidence': 0.4944,
      'speakerId': 4,
      'language': 'en-US',
      'instances': [{'adjustedStart': '0:00:00.68',
        'adjustedEnd': '0:00:02.32',
        'start': '0:00:00.68',
        'end': '0:00:02.32'}]},
     {'id': 2,
      'text': 'Where I am standing in front of the kiosk.',
      'confidence': 0.8505,
      'speakerId': 4,
      'language': 'en-US',
      'instances': [{'adjustedStart': '0:00:02.52',
        'adjustedEnd': '0:00:05.52',
        'start': '0:00:02.52',
        'end': '0:00:05.52'}]},
     {'id': 3,
      'text': 'And I am pressing.',
      'confidence': 0.4944,
      'speakerId': 2,
      'language': 'en-US',
      'instances': [{'adjustedStart': '0:00:06',
        'adjustedEnd': '0:00:07.28',
        'start': '0:00:06',
        'end': '0:00:07.28'}]},
     {'id': 4,
      'text': 'Few buttons and I am insulting my digital.',
      'confidence': 0.6057,
      'speakerId': 4,
      'language': 'en-US',
      'instances': [{'adjustedStart': '0:00:07.48',
        'adjustedEnd': '0:00:12.16',
        'start': '0:00:07.48',
        'end': '0:00:12.16'}]},
     {'id': 5,
      'text': 'Boarding pass.',
      'confidence': 0.4944,
      'speakerId': 5,
      'language': 'en-US',
      'instances': [{'adjustedStart': '0:00:12.4',
        'adjustedEnd': '0:00:13.44',
        'start': '0:00:12.4',
        'end': '0:00:13.44'}]},
     {'id': 6,
      'text': 'And.',
      'confidence': 0.4944,
      'speakerId': 2,
      'language': 'en-US',
      'instances': [{'adjustedStart': '0:00:13.64',
        'adjustedEnd': '0:00:14.2',
        'start': '0:00:13.64',
        'end': '0:00:14.2'}]},
     {'id': 7,
      'text': 'The instructions are there.',
      'confidence': 0.4944,
      'speakerId': 3,
      'language': 'en-US',
      'instances': [{'adjustedStart': '0:00:15.6',
        'adjustedEnd': '0:00:18.08',
        'start': '0:00:15.6',
        'end': '0:00:18.08'}]},
     {'id': 8,
      'text': 'And.',
      'confidence': 0.4944,
      'speakerId': 1,
      'language': 'en-US',
      'instances': [{'adjustedStart': '0:00:19',
        'adjustedEnd': '0:00:19.6',
        'start': '0:00:19',
        'end': '0:00:19.6'}]},
     {'id': 9,
      'text': 'The boarding pass.',
      'confidence': 0.4944,
      'speakerId': 3,
      'language': 'en-US',
      'instances': [{'adjustedStart': '0:00:20.88',
        'adjustedEnd': '0:00:22.24',
        'start': '0:00:20.88',
        'end': '0:00:22.24'}]},
     {'id': 10,
      'text': 'Is accepted.',
      'confidence': 0.4944,
      'speakerId': 5,
      'language': 'en-US',
      'instances': [{'adjustedStart': '0:00:22.24',
        'adjustedEnd': '0:00:23.2',
        'start': '0:00:22.24',
        'end': '0:00:23.2'}]},
     {'id': 11,
      'text': 'And I am good to go.',
      'confidence': 0.4944,
      'speakerId': 1,
      'language': 'en-US',
      'instances': [{'adjustedStart': '0:00:23.4',
        'adjustedEnd': '0:00:24.68',
        'start': '0:00:23.4',
        'end': '0:00:24.68'}]}],
    'keywords': [{'id': 1,
      'text': 'boarding pass',
      'confidence': 0.9975,
      'language': 'en-US',
      'instances': [{'adjustedStart': '0:00:12.4',
        'adjustedEnd': '0:00:13.44',
        'start': '0:00:12.4',
        'end': '0:00:13.44'},
       {'adjustedStart': '0:00:20.88',
        'adjustedEnd': '0:00:22.24',
        'start': '0:00:20.88',
        'end': '0:00:22.24'}]}],
    'topics': [{'id': 1,
      'name': 'Technology',
      'referenceId': 'Education/Technology',
      'referenceType': 'VideoIndexer',
      'iptcName': 'science and technology',
      'confidence': 0.7411,
      'iabName': 'Education',
      'language': 'en-US',
      'instances': [{'adjustedStart': '0:00:00.68',
        'adjustedEnd': '0:00:24.68',
        'start': '0:00:00.68',
        'end': '0:00:24.68'}]},
     {'id': 2,
      'name': 'Tourism',
      'referenceId': 'Tourism',
      'referenceType': 'VideoIndexer',
      'iptcName': 'lifestyle and leisure/leisure/travel and tourism/tourism',
      'confidence': 0.6612,
      'iabName': 'Travel',
      'language': 'en-US',
      'instances': [{'adjustedStart': '0:00:00.68',
        'adjustedEnd': '0:00:24.68',
        'start': '0:00:00.68',
        'end': '0:00:24.68'}]},
     {'id': 3,
      'name': 'International Travel Hacks',
      'referenceId': 'Tourism/Visiting and Travel/International Travel Hacks',
      'referenceType': 'VideoIndexer',
      'iptcName': 'lifestyle and leisure/leisure/travel and tourism/tourism',
      'confidence': 0.5613,
      'iabName': 'Travel',
      'language': 'en-US',
      'instances': [{'adjustedStart': '0:00:00.68',
        'adjustedEnd': '0:00:24.68',
        'start': '0:00:00.68',
        'end': '0:00:24.68'}]}],
    'faces': [{'id': 1000,
      'name': 'Unknown #1',
      'confidence': 0,
      'description': None,
      'thumbnailId': 'e2f1baa2-3a5d-4cb1-8ec8-a5ac8930deae',
      'title': None,
      'imageUrl': None,
      'thumbnails': [{'id': '4dcac8c4-73b7-45ba-a637-5dfbe769b11d',
        'fileName': 'FaceInstanceThumbnail_4dcac8c4-73b7-45ba-a637-5dfbe769b11d.jpg',
        'instances': [{'adjustedStart': '0:00:00',
          'adjustedEnd': '0:00:00.0335195',
          'start': '0:00:00',
          'end': '0:00:00.0335195'}]},
       {'id': '3a01d9bf-77d4-46b9-b5ad-a31228bd0cf4',
        'fileName': 'FaceInstanceThumbnail_3a01d9bf-77d4-46b9-b5ad-a31228bd0cf4.jpg',
        'instances': [{'adjustedStart': '0:00:05.2494777',
          'adjustedEnd': '0:00:05.2829972',
          'start': '0:00:05.2494777',
          'end': '0:00:05.2829972'}]},
       {'id': '1166d4ff-1aa2-428e-921f-b1788c728894',
        'fileName': 'FaceInstanceThumbnail_1166d4ff-1aa2-428e-921f-b1788c728894.jpg',
        'instances': [{'adjustedStart': '0:00:10.3860333',
          'adjustedEnd': '0:00:10.4195528',
          'start': '0:00:10.3860333',
          'end': '0:00:10.4195528'}]},
       {'id': 'b84ce045-a98b-4a38-baa4-8df915caa3d8',
        'fileName': 'FaceInstanceThumbnail_b84ce045-a98b-4a38-baa4-8df915caa3d8.jpg',
        'instances': [{'adjustedStart': '0:00:15.5229111',
          'adjustedEnd': '0:00:15.5564306',
          'start': '0:00:15.5229111',
          'end': '0:00:15.5564306'}]},
       {'id': '3a483e3c-7a56-45ee-a861-9f239510c542',
        'fileName': 'FaceInstanceThumbnail_3a483e3c-7a56-45ee-a861-9f239510c542.jpg',
        'instances': [{'adjustedStart': '0:00:20.6610111',
          'adjustedEnd': '0:00:20.6945306',
          'start': '0:00:20.6610111',
          'end': '0:00:20.6945306'}]},
       {'id': 'e2f1baa2-3a5d-4cb1-8ec8-a5ac8930deae',
        'fileName': 'FaceInstanceThumbnail_e2f1baa2-3a5d-4cb1-8ec8-a5ac8930deae.jpg',
        'instances': [{'adjustedStart': '0:00:25.7994111',
          'adjustedEnd': '0:00:25.8329306',
          'start': '0:00:25.7994111',
          'end': '0:00:25.8329306'}]}],
      'instances': [{'thumbnailsIds': ['1166d4ff-1aa2-428e-921f-b1788c728894',
         '3a483e3c-7a56-45ee-a861-9f239510c542',
         '4dcac8c4-73b7-45ba-a637-5dfbe769b11d',
         '3a01d9bf-77d4-46b9-b5ad-a31228bd0cf4',
         'b84ce045-a98b-4a38-baa4-8df915caa3d8',
         'e2f1baa2-3a5d-4cb1-8ec8-a5ac8930deae'],
        'adjustedStart': '0:00:00',
        'adjustedEnd': '0:00:26.4998556',
        'start': '0:00:00',
        'end': '0:00:26.4998556'}]}],
    'labels': [{'id': 1,
      'name': 'wall',
      'referenceId': 'structure/wall',
      'language': 'en-US',
      'instances': [{'confidence': 0.9937,
        'adjustedStart': '0:00:00',
        'adjustedEnd': '0:00:00.0334778',
        'start': '0:00:00',
        'end': '0:00:00.0334778'},
       {'confidence': 0.9923,
        'adjustedStart': '0:00:01.1044556',
        'adjustedEnd': '0:00:26.1660778',
        'start': '0:00:01.1044556',
        'end': '0:00:26.1660778'}]},
     {'id': 2,
      'name': 'human face',
      'language': 'en-US',
      'instances': [{'confidence': 0.9913,
        'adjustedStart': '0:00:00',
        'adjustedEnd': '0:00:00.0334778',
        'start': '0:00:00',
        'end': '0:00:00.0334778'},
       {'confidence': 0.9927,
        'adjustedStart': '0:00:01.1044556',
        'adjustedEnd': '0:00:26.1660778',
        'start': '0:00:01.1044556',
        'end': '0:00:26.1660778'}]},
     {'id': 3,
      'name': 'person',
      'referenceId': 'person',
      'language': 'en-US',
      'instances': [{'confidence': 0.9906,
        'adjustedStart': '0:00:00',
        'adjustedEnd': '0:00:00.0334778',
        'start': '0:00:00',
        'end': '0:00:00.0334778'},
       {'confidence': 0.9906,
        'adjustedStart': '0:00:01.1044556',
        'adjustedEnd': '0:00:26.1660778',
        'start': '0:00:01.1044556',
        'end': '0:00:26.1660778'}]},
     {'id': 4,
      'name': 'indoor',
      'language': 'en-US',
      'instances': [{'confidence': 0.9894,
        'adjustedStart': '0:00:00',
        'adjustedEnd': '0:00:00.0334778',
        'start': '0:00:00',
        'end': '0:00:00.0334778'},
       {'confidence': 0.9875,
        'adjustedStart': '0:00:01.1044556',
        'adjustedEnd': '0:00:26.1660778',
        'start': '0:00:01.1044556',
        'end': '0:00:26.1660778'}]}],
    'scenes': [{'id': 1,
      'instances': [{'adjustedStart': '0:00:00',
        'adjustedEnd': '0:00:26.4998556',
        'start': '0:00:00',
        'end': '0:00:26.4998556'}]}],
    'shots': [{'id': 1,
      'tags': ['ExtremeCloseUp', 'CenterFace'],
      'keyFrames': [{'id': 1,
        'instances': [{'thumbnailId': '6ff7b62d-c941-492b-a802-651b50ba4b07',
          'adjustedStart': '0:00:00.2672778',
          'adjustedEnd': '0:00:00.3007667',
          'start': '0:00:00.2672778',
          'end': '0:00:00.3007667'}]},
       {'id': 2,
        'instances': [{'thumbnailId': 'be08328e-2ab4-4c99-b5c3-aa9a4278e780',
          'adjustedStart': '0:00:00.3007667',
          'adjustedEnd': '0:00:00.3342556',
          'start': '0:00:00.3007667',
          'end': '0:00:00.3342556'}]}],
      'instances': [{'adjustedStart': '0:00:00',
        'adjustedEnd': '0:00:26.4998556',
        'start': '0:00:00',
        'end': '0:00:26.4998556'}]}],
    'namedLocations': [{'id': 1,
      'name': 'kiosk',
      'referenceId': None,
      'referenceUrl': None,
      'description': None,
      'tags': [],
      'confidence': 0.99,
      'isCustom': False,
      'instances': [{'instanceSource': 'Transcript',
        'adjustedStart': '0:00:02.52',
        'adjustedEnd': '0:00:05.52',
        'start': '0:00:02.52',
        'end': '0:00:05.52'}]}],
    'audioEffects': [{'id': 0,
      'type': 'Silence',
      'instances': [{'confidence': 0.8294,
        'adjustedStart': '0:00:00',
        'adjustedEnd': '0:00:01.5',
        'start': '0:00:00',
        'end': '0:00:01.5'},
       {'confidence': 0.7832,
        'adjustedStart': '0:00:18.58',
        'adjustedEnd': '0:00:19.58',
        'start': '0:00:18.58',
        'end': '0:00:19.58'}]}],
    'detectedObjects': [{'id': 1,
      'type': 'PottedPlant',
      'thumbnailId': '74a853f3-0a13-4b86-9f9e-d104a09540cc',
      'displayName': 'potted plant',
      'wikiDataId': 'Q27993793',
      'instances': [{'confidence': 0.441,
        'adjustedStart': '0:00:00',
        'adjustedEnd': '0:00:26.499856',
        'start': '0:00:00',
        'end': '0:00:26.499856'}]},
     {'id': 2,
      'type': 'Vase',
      'thumbnailId': '702a3e0f-87c9-4136-b18d-b16a4f7abc30',
      'displayName': 'vase',
      'wikiDataId': 'Q191851',
      'instances': [{'confidence': 0.189,
        'adjustedStart': '0:00:00',
        'adjustedEnd': '0:00:25.494804',
        'start': '0:00:00',
        'end': '0:00:25.494804'},
       {'confidence': 0.179,
        'adjustedStart': '0:00:25.963828',
        'adjustedEnd': '0:00:26.499856',
        'start': '0:00:25.963828',
        'end': '0:00:26.499856'}]}],
    'sentiments': [{'id': 1,
      'averageScore': 0.5,
      'sentimentType': 'Neutral',
      'instances': [{'adjustedStart': '0:00:00',
        'adjustedEnd': '0:00:07.48',
        'start': '0:00:00',
        'end': '0:00:07.48'},
       {'adjustedStart': '0:00:12.16',
        'adjustedEnd': '0:00:26.499856',
        'start': '0:00:12.16',
        'end': '0:00:26.499856'}]},
     {'id': 2,
      'averageScore': 0.1867,
      'sentimentType': 'Negative',
      'instances': [{'adjustedStart': '0:00:07.48',
        'adjustedEnd': '0:00:12.16',
        'start': '0:00:07.48',
        'end': '0:00:12.16'}]}],
    'emotions': [{'id': 1,
      'type': 'Anger',
      'instances': [{'confidence': 0.9334,
        'adjustedStart': '0:00:07.48',
        'adjustedEnd': '0:00:12.16',
        'start': '0:00:07.48',
        'end': '0:00:12.16'}]}],
    'blocks': [{'id': 0,
      'instances': [{'adjustedStart': '0:00:00',
        'adjustedEnd': '0:00:26.499856',
        'start': '0:00:00',
        'end': '0:00:26.499856'}]}],
    'speakers': [{'id': 1,
      'name': 'Speaker #1',
      'instances': [{'adjustedStart': '0:00:19',
        'adjustedEnd': '0:00:19.6',
        'start': '0:00:19',
        'end': '0:00:19.6'},
       {'adjustedStart': '0:00:23.4',
        'adjustedEnd': '0:00:24.68',
        'start': '0:00:23.4',
        'end': '0:00:24.68'}]},
     {'id': 2,
      'name': 'Speaker #2',
      'instances': [{'adjustedStart': '0:00:06',
        'adjustedEnd': '0:00:07.28',
        'start': '0:00:06',
        'end': '0:00:07.28'},
       {'adjustedStart': '0:00:13.64',
        'adjustedEnd': '0:00:14.2',
        'start': '0:00:13.64',
        'end': '0:00:14.2'}]},
     {'id': 3,
      'name': 'Speaker #3',
      'instances': [{'adjustedStart': '0:00:15.6',
        'adjustedEnd': '0:00:18.08',
        'start': '0:00:15.6',
        'end': '0:00:18.08'},
       {'adjustedStart': '0:00:20.88',
        'adjustedEnd': '0:00:22.24',
        'start': '0:00:20.88',
        'end': '0:00:22.24'}]},
     {'id': 4,
      'name': 'Speaker #4',
      'instances': [{'adjustedStart': '0:00:00.68',
        'adjustedEnd': '0:00:02.32',
        'start': '0:00:00.68',
        'end': '0:00:02.32'},
       {'adjustedStart': '0:00:02.52',
        'adjustedEnd': '0:00:05.52',
        'start': '0:00:02.52',
        'end': '0:00:05.52'},
       {'adjustedStart': '0:00:07.48',
        'adjustedEnd': '0:00:12.16',
        'start': '0:00:07.48',
        'end': '0:00:12.16'}]},
     {'id': 5,
      'name': 'Speaker #5',
      'instances': [{'adjustedStart': '0:00:12.4',
        'adjustedEnd': '0:00:13.44',
        'start': '0:00:12.4',
        'end': '0:00:13.44'},
       {'adjustedStart': '0:00:22.24',
        'adjustedEnd': '0:00:23.2',
        'start': '0:00:22.24',
        'end': '0:00:23.2'}]}],
    'textualContentModeration': {'id': 0,
     'bannedWordsCount': 0,
     'bannedWordsRatio': 0,
     'instances': []},
    'statistics': {'correspondenceCount': 9,
     'speakerTalkToListenRatio': {'1': 0.099,
      '2': 0.097,
      '3': 0.203,
      '4': 0.493,
      '5': 0.105},
     'speakerLongestMonolog': {'1': 1, '2': 1, '3': 2, '4': 4, '5': 1},
     'speakerNumberOfFragments': {'1': 2, '2': 2, '3': 2, '4': 3, '5': 2},
     'speakerWordCount': {'1': 7, '2': 5, '3': 7, '4': 22, '5': 4}}},
   'thumbnailId': '0db877fa-54fa-452b-bc3f-1072548ff26e',
   'width': 1080,
   'height': 720,
   'detectSourceLanguage': False,
   'languageAutoDetectMode': 'None',
   'sourceLanguage': 'en-US',
   'sourceLanguages': ['en-US'],
   'language': 'en-US',
   'languages': ['en-US'],
   'indexingPreset': 'Default',
   'streamingPreset': 'Default',
   'linguisticModelId': '00000000-0000-0000-0000-000000000000',
   'personModelId': '00000000-0000-0000-0000-000000000000',
   'logoGroupId': None,
   'isAdult': False,
   'publishedUrl': 'https://rodmandev.streaming.mediaservices.windows.net/64a55f3a-8b6b-4df6-bc8a-ee67a4d70765/fcfade0e-f44b-43fe-9198-2e194c5b.ism/manifest(encryption=cbc)',
   'publishedProxyUrl': None,
   'viewToken': 'Bearer=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1cm46bWljcm9zb2Z0OmF6dXJlOm1lZGlhc2VydmljZXM6Y29udGVudGtleWlkZW50aWZpZXIiOiJiNmVmZGNmZi1lOWQ3LTRiMWEtODVjYi00NGNjODVjOWNiZjYiLCJuYmYiOjE2OTg2MzM1MDIsImV4cCI6MTY5ODY3Njc2MiwiaXNzIjoiaHR0cHM6Ly9icmVha2Rvd24ubWUiLCJhdWQiOiJCcmVha2Rvd25Vc2VyIn0.NyXRZu5Lgr8feGfkqgFy5pmun6ZU8m0ltZHBjLUCjjk'}],
 'videosRanges': [{'videoId': '74e2ce28ab',
   'range': {'start': '0:00:00', 'end': '0:00:26.499856'}}]}
In [7]:
info = video_analysis.get_video_info(video_id, video_language='English')
Getting video info for: 74e2ce28ab
In [8]:
if len(info['videos'][0]['insights']['faces'][0]['thumbnails']):
    print("We found {} faces in this video.".format(str(len(info['videos'][0]['insights']['faces'][0]['thumbnails']))))
We found 6 faces in this video.
In [9]:
info['summarizedInsights']['sentiments']
Out[9]:
[{'sentimentKey': 'Neutral',
  'seenDurationRatio': 0.8226,
  'appearances': [{'startTime': '0:00:00',
    'endTime': '0:00:07.48',
    'startSeconds': 0,
    'endSeconds': 7.5},
   {'startTime': '0:00:12.16',
    'endTime': '0:00:26.499856',
    'startSeconds': 12.2,
    'endSeconds': 26.5}]},
 {'sentimentKey': 'Negative',
  'seenDurationRatio': 0.1774,
  'appearances': [{'startTime': '0:00:07.48',
    'endTime': '0:00:12.16',
    'startSeconds': 7.5,
    'endSeconds': 12.2}]}]
In [10]:
info['summarizedInsights']['emotions']
Out[10]:
[{'type': 'Anger',
  'seenDurationRatio': 0.1774,
  'appearances': [{'startTime': '0:00:07.48',
    'endTime': '0:00:12.16',
    'startSeconds': 7.5,
    'endSeconds': 12.2}]}]
In [11]:
images = []
img_raw = []
img_strs = []
for each_thumb in info['videos'][0]['insights']['faces'][0]['thumbnails']:
    if 'fileName' in each_thumb and 'id' in each_thumb:
        file_name = each_thumb['fileName']
        thumb_id = each_thumb['id']
        img_code = video_analysis.get_thumbnail_from_video_indexer(video_id,  thumb_id)
        img_strs.append(img_code)
        img_stream = io.BytesIO(img_code)
        img_raw.append(img_stream)
        img = Image.open(img_stream)
        images.append(img)
Getting thumbnail from video: 74e2ce28ab, thumbnail: 4dcac8c4-73b7-45ba-a637-5dfbe769b11d
Getting thumbnail from video: 74e2ce28ab, thumbnail: 3a01d9bf-77d4-46b9-b5ad-a31228bd0cf4
Getting thumbnail from video: 74e2ce28ab, thumbnail: 1166d4ff-1aa2-428e-921f-b1788c728894
Getting thumbnail from video: 74e2ce28ab, thumbnail: b84ce045-a98b-4a38-baa4-8df915caa3d8
Getting thumbnail from video: 74e2ce28ab, thumbnail: 3a483e3c-7a56-45ee-a861-9f239510c542
Getting thumbnail from video: 74e2ce28ab, thumbnail: e2f1baa2-3a5d-4cb1-8ec8-a5ac8930deae
In [12]:
for img in images:
    print(img.info)
    plt.figure()
    plt.imshow(img)
{'comment': b'Lavc59.37.100\x00'}
{'comment': b'Lavc59.37.100\x00'}
{'comment': b'Lavc59.37.100\x00'}
{'comment': b'Lavc59.37.100\x00'}
{'comment': b'Lavc59.37.100\x00'}
{'comment': b'Lavc59.37.100\x00'}
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [13]:
i = 1
for img in images:
    print(type(img))
    img.save('kiosk-face' + str(i) + '.jpg')
    i= i+ 1
<class 'PIL.JpegImagePlugin.JpegImageFile'>
<class 'PIL.JpegImagePlugin.JpegImageFile'>
<class 'PIL.JpegImagePlugin.JpegImageFile'>
<class 'PIL.JpegImagePlugin.JpegImageFile'>
<class 'PIL.JpegImagePlugin.JpegImageFile'>
<class 'PIL.JpegImagePlugin.JpegImageFile'>
In [14]:
# Create a client
KEY = ""
ENDPOINT = ""
# Create an authenticated FaceClient.
face_client = FaceClient(ENDPOINT, CognitiveServicesCredentials(KEY))
In [15]:
def test_azure_face_api():
    try:
        # Using a sample image URL. You can replace with any valid image URL.
        image_url = 'https://raw.githubusercontent.com/Microsoft/Cognitive-Face-Windows/master/Data/detection1.jpg'
        detected_faces = face_client.face.detect_with_url(image_url)
        
        if detected_faces:
            print(f"Detected {len(detected_faces)} face(s) in the sample image.")
            return True
        else:
            print("No faces detected in the sample image, but the API key and endpoint are valid.")
            return True
    except Exception as e:
        print(f"Error: {e}")
        return False

# Test the Face API
test_azure_face_api()
Detected 1 face(s) in the sample image.
Out[15]:
True
In [16]:
face_client.api_version
Out[16]:
'1.0'
In [17]:
PERSON_GROUP_ID = str(uuid.uuid4())
person_group_name = 'person-avkash-kiosk'
In [18]:
## This code is taken from Azure face SDK 
## ---------------------------------------
def build_person_group(client, person_group_id, pgp_name):
    print('Create and build a person group...')
    # Create empty Person Group. Person Group ID must be lower case, alphanumeric, and/or with '-', '_'.
    print('Person group ID:', person_group_id)
    client.person_group.create(person_group_id = person_group_id, name=person_group_id)

    # Create a person group person.
    my_face = client.person_group_person.create(person_group_id, pgp_name)
    # Find all jpeg images of human in working directory.
    my_face_images = [file for file in glob.glob('*.jpg') if file.startswith("kiosk-face")]
    # Add images to a Person object
    for image_p in my_face_images:
        with open(image_p, 'rb') as w:
            client.person_group_person.add_face_from_stream(person_group_id, my_face.person_id, w)

    # Train the person group, after a Person object with many images were added to it.
    client.person_group.train(person_group_id)

    # Wait for training to finish.
    while (True):
        training_status = client.person_group.get_training_status(person_group_id)
        print("Training status: {}.".format(training_status.status))
        if (training_status.status is TrainingStatusType.succeeded):
            break
        elif (training_status.status is TrainingStatusType.failed):
            client.person_group.delete(person_group_id=PERSON_GROUP_ID)
            sys.exit('Training the person group has failed.')
        time.sleep(5)
In [19]:
build_person_group(face_client, PERSON_GROUP_ID, person_group_name)
Create and build a person group...
Person group ID: e48cf1d7-347a-417a-a338-ed491c0d44ed
Training status: TrainingStatusType.running.
Training status: TrainingStatusType.succeeded.
In [20]:
dl_faces = face_client.face.detect_with_url("https://raw.githubusercontent.com/natebuel29/cd0461-building-computer-vision-solutions-with-azure-project-starter/master/starter/digital_id_template/ca-dl-avkash.png")
dl_faces[0].face_id
Out[20]:
'653b78f6-a9bb-4e33-ab36-0f8d3472f16e'
In [21]:
# Enter the face ID of ca-dl-sample.png from the output of the cell above
get_the_face_id_from_the_sample = '20320a97-2402-4c82-9a09-3510491fcf2f'
In [22]:
person_gp_results = face_client.face.identify([get_the_face_id_from_the_sample], PERSON_GROUP_ID)
In [23]:
for result in person_gp_results:
    if result.candidates:
        for candidate in result.candidates:
            print("The Identity match confidence is {}".format(candidate.confidence))
    else:
        print("Can't verify the identity with the person group")
The Identity match confidence is 0.8137
In [24]:
def process_video(video_name):
    uploaded_video_id = video_analysis.upload_to_video_indexer(
        input_filename=video_name + '.mp4',
        video_name=video_name,
        video_language='English'
    )

    video_analysis.check_access_token()
    info = video_analysis.get_video_info(uploaded_video_id)

    # Single progress bar integration
    while not info or 'videos' not in info or not info['videos'] or 'processingProgress' in info['videos'][0] and info['videos'][0]['processingProgress'] != '100%':
        if 'processingProgress' in info['videos'][0]:
            percentage = info['videos'][0]['processingProgress']
        else:
            percentage = "0%"
    
        sys.stdout.write("\rProgress: [{}] {} ".format("#" * int(percentage.replace("%", "")) + "-" * (100 - int(percentage.replace("%", ""))), percentage))
        sys.stdout.flush()
        time.sleep(10)
        info = video_analysis.get_video_info(uploaded_video_id)

    sys.stdout.write("\n")  # Newline after progress bar completion

    if not info:
        print("\nNo info returned from video indexer for video {}".format(video_name))
        return

    video_data = info['videos'][0]

    if 'insights' not in video_data or 'faces' not in video_data['insights'] or not video_data['insights']['faces']:
        print("\nNo face insights found in the video {}".format(video_name))
        return

    face_data = video_data['insights']['faces'][0]
    if 'thumbnails' not in face_data or not face_data['thumbnails']:
        print("\nNo face thumbnails found in the video {}".format(video_name))
        return

    print("\nWe found {} faces in {}.".format(str(len(face_data['thumbnails'])), video_name))

    images = []
    for each_thumb in face_data['thumbnails']:
        if 'fileName' in each_thumb and 'id' in each_thumb:
            thumb_id = each_thumb['id']
            img_code = video_analysis.get_thumbnail_from_video_indexer(uploaded_video_id, thumb_id)
            img_stream = io.BytesIO(img_code)
            img = Image.open(img_stream)
            images.append(img)

    # Capture and save only 5 thumbnails directly to the local directory (not in a subdirectory)
    for i, img in enumerate(images[:5]):
        thumbnail_path = os.path.join(os.getcwd(), video_name + '-face' + str(i + 1) + '.jpg')
        img.save(thumbnail_path)
        imshow(img)
        plt.show()

videos = ['ca-dl-avkash-chauhan', 'ca-dl-james-jackson', 'ca-dl-james-webb', 'ca-dl-libby-herold', 'ca-dl-radha-s-kumar']

for video in videos:
    process_video(video)
Uploading video to video indexer...
Getting video info for: 150aa39b7f
Video still processing, current status: 5%
Progress: [#####-----------------------------------------------------------------------------------------------] 5% Getting video info for: 150aa39b7f
Video still processing, current status: 5%
Progress: [#####-----------------------------------------------------------------------------------------------] 5% Getting video info for: 150aa39b7f
Video still processing, current status: 18%
Progress: [##################----------------------------------------------------------------------------------] 18% Getting video info for: 150aa39b7f
Video still processing, current status: 18%
Progress: [##################----------------------------------------------------------------------------------] 18% Getting video info for: 150aa39b7f
Video still processing, current status: 18%
Progress: [##################----------------------------------------------------------------------------------] 18% Getting video info for: 150aa39b7f
Video still processing, current status: 18%
Progress: [##################----------------------------------------------------------------------------------] 18% Getting video info for: 150aa39b7f
Video still processing, current status: 18%
Progress: [##################----------------------------------------------------------------------------------] 18% Getting video info for: 150aa39b7f
Video still processing, current status: 84%
Progress: [####################################################################################----------------] 84% Getting video info for: 150aa39b7f


We found 6 faces in ca-dl-avkash-chauhan.
Getting thumbnail from video: 150aa39b7f, thumbnail: fc5007df-2309-432e-aa27-3be13843ce3c
Getting thumbnail from video: 150aa39b7f, thumbnail: cb7dfa45-c856-42ff-889d-66507fee4982
Getting thumbnail from video: 150aa39b7f, thumbnail: 2fa9d2bb-dc24-4ac6-976e-6729af34e7af
Getting thumbnail from video: 150aa39b7f, thumbnail: 0063d4f7-1770-419d-978f-2b68e2a45b4f
Getting thumbnail from video: 150aa39b7f, thumbnail: 07edd07e-29e5-4756-b59a-ebbf441bf6f2
Getting thumbnail from video: 150aa39b7f, thumbnail: ce886c04-156f-479a-aeec-fb4af4daa940
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Uploading video to video indexer...
Getting video info for: ad094bf314
Video still processing, current status: 5%
Progress: [#####-----------------------------------------------------------------------------------------------] 5% Getting video info for: ad094bf314
Video still processing, current status: 15%
Progress: [###############-------------------------------------------------------------------------------------] 15% Getting video info for: ad094bf314
Video still processing, current status: 15%
Progress: [###############-------------------------------------------------------------------------------------] 15% Getting video info for: ad094bf314
Video still processing, current status: 15%
Progress: [###############-------------------------------------------------------------------------------------] 15% Getting video info for: ad094bf314
Video still processing, current status: 15%
Progress: [###############-------------------------------------------------------------------------------------] 15% Getting video info for: ad094bf314
Video still processing, current status: 70%
Progress: [######################################################################------------------------------] 70% Getting video info for: ad094bf314
Video still processing, current status: 70%
Progress: [######################################################################------------------------------] 70% Getting video info for: ad094bf314
Video still processing, current status: 70%
Progress: [######################################################################------------------------------] 70% Getting video info for: ad094bf314
Video still processing, current status: 70%
Progress: [######################################################################------------------------------] 70% Getting video info for: ad094bf314
Video still processing, current status: 70%
Progress: [######################################################################------------------------------] 70% Getting video info for: ad094bf314
Video still processing, current status: 70%
Progress: [######################################################################------------------------------] 70% Getting video info for: ad094bf314
Video still processing, current status: 84%
Progress: [####################################################################################----------------] 84% Getting video info for: ad094bf314


We found 11 faces in ca-dl-james-jackson.
Getting thumbnail from video: ad094bf314, thumbnail: 0020251c-6bac-4d21-bb39-56bb632e7f6a
Getting thumbnail from video: ad094bf314, thumbnail: 2f03111c-69ab-46e3-a091-a3db03771ea5
Getting thumbnail from video: ad094bf314, thumbnail: 029d3ab4-984e-4a39-a9f2-22d7d36a6e32
Getting thumbnail from video: ad094bf314, thumbnail: e6505b2c-7c2d-4051-9db3-be6104e943f5
Getting thumbnail from video: ad094bf314, thumbnail: bed3d21f-a9dd-43d4-a3a2-ea4642381db5
Getting thumbnail from video: ad094bf314, thumbnail: 06675e4f-e941-4333-9681-e721f43aee06
Getting thumbnail from video: ad094bf314, thumbnail: 72f6113b-4f7a-45b5-bdd9-3ecaa9a8b1c2
Getting thumbnail from video: ad094bf314, thumbnail: 37c62270-645d-4cc2-8909-6cf195a655fe
Getting thumbnail from video: ad094bf314, thumbnail: 18c32fbc-90b8-4cca-8426-aecb24b91ae9
Getting thumbnail from video: ad094bf314, thumbnail: 0f6db989-39ce-4466-9e61-7dec76f599e9
Getting thumbnail from video: ad094bf314, thumbnail: 12aa02fa-33bd-4a92-8677-b4346c0f4e11
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Uploading video to video indexer...
Getting video info for: b7c80399a3
Video still processing, current status: 5%
Progress: [#####-----------------------------------------------------------------------------------------------] 5% Getting video info for: b7c80399a3
Video still processing, current status: 5%
Progress: [#####-----------------------------------------------------------------------------------------------] 5% Getting video info for: b7c80399a3
Video still processing, current status: 40%
Progress: [########################################------------------------------------------------------------] 40% Getting video info for: b7c80399a3
Video still processing, current status: 70%
Progress: [######################################################################------------------------------] 70% Getting video info for: b7c80399a3
Video still processing, current status: 70%
Progress: [######################################################################------------------------------] 70% Getting video info for: b7c80399a3
Video still processing, current status: 70%
Progress: [######################################################################------------------------------] 70% Getting video info for: b7c80399a3
Video still processing, current status: 71%
Progress: [#######################################################################-----------------------------] 71% Getting video info for: b7c80399a3
Video still processing, current status: 71%
Progress: [#######################################################################-----------------------------] 71% Getting video info for: b7c80399a3
Video still processing, current status: 71%
Progress: [#######################################################################-----------------------------] 71% Getting video info for: b7c80399a3
Video still processing, current status: 83%
Progress: [###################################################################################-----------------] 83% Getting video info for: b7c80399a3


We found 28 faces in ca-dl-james-webb.
Getting thumbnail from video: b7c80399a3, thumbnail: c1e44ba7-d1e2-4e91-9dd1-de3e7710b30c
Getting thumbnail from video: b7c80399a3, thumbnail: 32edbc2b-cdcc-4df9-bebe-1142e4a056ec
Getting thumbnail from video: b7c80399a3, thumbnail: a75633f2-2f4b-4c86-b308-a87fe6a83993
Getting thumbnail from video: b7c80399a3, thumbnail: 2e75e36a-c536-4f38-8f03-6da9c40b1ad4
Getting thumbnail from video: b7c80399a3, thumbnail: d46fa6e3-c3be-4abe-aff7-424aeb149e8a
Getting thumbnail from video: b7c80399a3, thumbnail: 3203771f-eb5e-4728-90ba-8c9bb6c94547
Getting thumbnail from video: b7c80399a3, thumbnail: 792426c6-58ad-4746-b776-d61d097106fa
Getting thumbnail from video: b7c80399a3, thumbnail: 44520d9c-0e37-4afe-aae2-597a81f3827f
Getting thumbnail from video: b7c80399a3, thumbnail: 152a1fb2-af88-46d0-8943-63ff12fe28d9
Getting thumbnail from video: b7c80399a3, thumbnail: 949b467c-8543-4e24-aa1a-0881d9483b48
Getting thumbnail from video: b7c80399a3, thumbnail: dece305d-b106-40a4-935a-da5cb0d29357
Getting thumbnail from video: b7c80399a3, thumbnail: 85257fce-911a-48a9-b318-3c4c4cefdbe2
Getting thumbnail from video: b7c80399a3, thumbnail: d8942c0a-c6c7-4111-932f-02a9127a24da
Getting thumbnail from video: b7c80399a3, thumbnail: fa4f8631-02ab-46e9-a234-a3ebab5b1725
Getting thumbnail from video: b7c80399a3, thumbnail: 3100a062-1328-47fe-bce8-7c33f7ea1284
Getting thumbnail from video: b7c80399a3, thumbnail: ae6b75bd-1429-4e09-8a0c-4962041ffff5
Getting thumbnail from video: b7c80399a3, thumbnail: a58c8c37-d389-4f5d-a344-9ea4d848ce0a
Getting thumbnail from video: b7c80399a3, thumbnail: 2eda56b2-1ac7-4742-ad27-10b9b460ce55
Getting thumbnail from video: b7c80399a3, thumbnail: c088595d-c26d-4fc2-a8f8-7e2e30e20625
Getting thumbnail from video: b7c80399a3, thumbnail: 6f4d7005-d274-4536-8fc4-e2e0f09a6a1a
Getting thumbnail from video: b7c80399a3, thumbnail: 04fa75b7-87a7-49f3-8830-4b2654e0af27
Getting thumbnail from video: b7c80399a3, thumbnail: b0a88229-f9cd-4c6c-aaa7-ee415fe8672c
Getting thumbnail from video: b7c80399a3, thumbnail: 31db3d03-9fdf-4df5-9133-dea0c1bfb63d
Getting thumbnail from video: b7c80399a3, thumbnail: 9c1c1059-e849-49a0-abd2-251c74297e47
Getting thumbnail from video: b7c80399a3, thumbnail: 0bbc5431-ebfb-4758-9ded-257af7d161ac
Getting thumbnail from video: b7c80399a3, thumbnail: 9105a58d-4ab2-4bd7-9ab4-100375c8cd23
Getting thumbnail from video: b7c80399a3, thumbnail: 772a3c24-5a28-4688-9cfe-d615a34253d0
Getting thumbnail from video: b7c80399a3, thumbnail: 04e20657-1379-49c7-a0c4-6a40e4f38af9
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Uploading video to video indexer...
Getting video info for: bf1c9fe3e0
Video still processing, current status: 5%
Progress: [#####-----------------------------------------------------------------------------------------------] 5% Getting video info for: bf1c9fe3e0
Video still processing, current status: 13%
Progress: [#############---------------------------------------------------------------------------------------] 13% Getting video info for: bf1c9fe3e0
Video still processing, current status: 13%
Progress: [#############---------------------------------------------------------------------------------------] 13% Getting video info for: bf1c9fe3e0
Video still processing, current status: 13%
Progress: [#############---------------------------------------------------------------------------------------] 13% Getting video info for: bf1c9fe3e0
Video still processing, current status: 40%
Progress: [########################################------------------------------------------------------------] 40% Getting video info for: bf1c9fe3e0
Video still processing, current status: 70%
Progress: [######################################################################------------------------------] 70% Getting video info for: bf1c9fe3e0
Video still processing, current status: 70%
Progress: [######################################################################------------------------------] 70% Getting video info for: bf1c9fe3e0
Video still processing, current status: 70%
Progress: [######################################################################------------------------------] 70% Getting video info for: bf1c9fe3e0
Video still processing, current status: 71%
Progress: [#######################################################################-----------------------------] 71% Getting video info for: bf1c9fe3e0
Video still processing, current status: 71%
Progress: [#######################################################################-----------------------------] 71% Getting video info for: bf1c9fe3e0
Video still processing, current status: 71%
Progress: [#######################################################################-----------------------------] 71% Getting video info for: bf1c9fe3e0
Video still processing, current status: 71%
Progress: [#######################################################################-----------------------------] 71% Getting video info for: bf1c9fe3e0


We found 28 faces in ca-dl-libby-herold.
Getting thumbnail from video: bf1c9fe3e0, thumbnail: e3bed621-3529-4d54-aba7-dda3b76d1593
Getting thumbnail from video: bf1c9fe3e0, thumbnail: 78d920df-1611-49a5-80dd-9551285219b7
Getting thumbnail from video: bf1c9fe3e0, thumbnail: d47d7961-73a1-49d4-80dd-1110ae3b48d7
Getting thumbnail from video: bf1c9fe3e0, thumbnail: f5056dda-68c7-4f0c-8490-f919115654e8
Getting thumbnail from video: bf1c9fe3e0, thumbnail: f0c60714-b215-415b-aae5-fbc7ecd0b9b7
Getting thumbnail from video: bf1c9fe3e0, thumbnail: 78b59a98-c3c6-41f9-96bf-d6cc3538a3db
Getting thumbnail from video: bf1c9fe3e0, thumbnail: 188cca3f-fe0f-4062-873a-876d4cbad74d
Getting thumbnail from video: bf1c9fe3e0, thumbnail: 38d810c5-b176-408c-8a05-27c00690bb98
Getting thumbnail from video: bf1c9fe3e0, thumbnail: 50cc0bf9-ca8b-40d5-9060-c063eac12941
Getting thumbnail from video: bf1c9fe3e0, thumbnail: 9b3b92ab-1c96-4670-b959-c15bbb8ad02f
Getting thumbnail from video: bf1c9fe3e0, thumbnail: d52d1b31-ebdb-44a6-a2a5-31e23ed6a585
Getting thumbnail from video: bf1c9fe3e0, thumbnail: e2caef9b-2cd9-4dd8-9488-77bd8096fa55
Getting thumbnail from video: bf1c9fe3e0, thumbnail: ba4b53f4-962f-4a66-a6c7-43413d6b5093
Getting thumbnail from video: bf1c9fe3e0, thumbnail: 4eb5cc5a-e934-44fd-b5e0-3205739bda3a
Getting thumbnail from video: bf1c9fe3e0, thumbnail: 3d6de974-5d5e-4614-b15b-056825955b1f
Getting thumbnail from video: bf1c9fe3e0, thumbnail: f57e7b98-c311-463d-ace6-98cbacaa8690
Getting thumbnail from video: bf1c9fe3e0, thumbnail: ba19ced9-644a-440e-849b-bcc8f31737b4
Getting thumbnail from video: bf1c9fe3e0, thumbnail: 30d002c0-ebc1-45ab-9db7-aa433d520d00
Getting thumbnail from video: bf1c9fe3e0, thumbnail: 418948e0-2c84-4a38-a4ed-570052c39858
Getting thumbnail from video: bf1c9fe3e0, thumbnail: 4aa2614e-f231-4664-b098-3670d7c4e996
Getting thumbnail from video: bf1c9fe3e0, thumbnail: 287184e8-dd7e-4702-ac3a-866d217436ff
Getting thumbnail from video: bf1c9fe3e0, thumbnail: 7e34916c-b80d-4f8e-ba07-ffc0018975a6
Getting thumbnail from video: bf1c9fe3e0, thumbnail: 451fb1a7-c62c-4b74-a040-cd848ca292cb
Getting thumbnail from video: bf1c9fe3e0, thumbnail: b73f1d4d-8119-4311-89f0-35a6b17d8c48
Getting thumbnail from video: bf1c9fe3e0, thumbnail: 3fe24f73-cd83-4aa6-bb69-d6e75aaee1e0
Getting thumbnail from video: bf1c9fe3e0, thumbnail: 66371b57-2dff-4ecd-9acc-25e6270f976a
Getting thumbnail from video: bf1c9fe3e0, thumbnail: 6ded4ec3-de17-4e40-9da2-5ed43c31fcfc
Getting thumbnail from video: bf1c9fe3e0, thumbnail: 27b0dd59-4da3-4389-98e8-c6d4febc2221
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Uploading video to video indexer...
Getting video info for: ebd7808288
Video still processing, current status: 5%
Progress: [#####-----------------------------------------------------------------------------------------------] 5% Getting video info for: ebd7808288
Video still processing, current status: 13%
Progress: [#############---------------------------------------------------------------------------------------] 13% Getting video info for: ebd7808288
Video still processing, current status: 15%
Progress: [###############-------------------------------------------------------------------------------------] 15% Getting video info for: ebd7808288
Video still processing, current status: 15%
Progress: [###############-------------------------------------------------------------------------------------] 15% Getting video info for: ebd7808288
Video still processing, current status: 70%
Progress: [######################################################################------------------------------] 70% Getting video info for: ebd7808288
Video still processing, current status: 70%
Progress: [######################################################################------------------------------] 70% Getting video info for: ebd7808288
Video still processing, current status: 71%
Progress: [#######################################################################-----------------------------] 71% Getting video info for: ebd7808288
Video still processing, current status: 84%
Progress: [####################################################################################----------------] 84% Getting video info for: ebd7808288


We found 8 faces in ca-dl-radha-s-kumar.
Getting thumbnail from video: ebd7808288, thumbnail: 19a5b01b-66d2-4120-89d1-3a595a874bf4
Getting thumbnail from video: ebd7808288, thumbnail: f8366af0-170d-4e12-b312-0b290a61d194
Getting thumbnail from video: ebd7808288, thumbnail: c39e2c3b-27bf-4c69-a237-15ffe59e57d1
Getting thumbnail from video: ebd7808288, thumbnail: 06e481b8-21f3-4aa5-a857-b9c371dc6037
Getting thumbnail from video: ebd7808288, thumbnail: b0773994-cb66-4b1c-af67-2331003da31f
Getting thumbnail from video: ebd7808288, thumbnail: 24ea36d6-5416-4a30-9e31-0de1b728720c
Getting thumbnail from video: ebd7808288, thumbnail: 1d4a226c-f446-4f23-95f8-02f151080a07
Getting thumbnail from video: ebd7808288, thumbnail: be584804-9a25-4551-9b00-8ee8082ceaac
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [25]:
AZURE_FORM_RECOGNIZER_ENDPOINT = ""
AZURE_FORM_RECOGNIZER_KEY = ""
endpoint = AZURE_FORM_RECOGNIZER_ENDPOINT
key = AZURE_FORM_RECOGNIZER_KEY
form_recognizer_client = FormRecognizerClient(endpoint=endpoint, credential=AzureKeyCredential(key))
# Create a client for Face API


ID_URLs = [
    'https://mymlworkspace6225174622.blob.core.windows.net/digitalidface/ca-dl-avkash-chauhan.png', 
    'https://mymlworkspace6225174622.blob.core.windows.net/digitalidface/ca-dl-james-jackson.png', 
    'https://mymlworkspace6225174622.blob.core.windows.net/digitalidface/ca-dl-james-webb.png', 
    'https://mymlworkspace6225174622.blob.core.windows.net/digitalidface/ca-dl-libby-herold.png', 
    'https://mymlworkspace6225174622.blob.core.windows.net/digitalidface/ca-dl-radha-s-kumar.png'
]

PERSON_GROUP_ID = str(uuid.uuid4())

def print_id_card_details(identity_card):
    first_name = identity_card.fields.get("FirstName")
    last_name = identity_card.fields.get("LastName")
    if first_name and last_name:
        return f"{first_name.value} {last_name.value}"
    else:
        print("Name not found!")
        return None

def extract_data_from_ids(form_recognizer_client, url):
    try:
        id_content_from_url = form_recognizer_client.begin_recognize_identity_documents_from_url(url)
        collected_id_cards = id_content_from_url.result()
        if collected_id_cards:
            return print_id_card_details(collected_id_cards[0])
        else:
            print(f"No data found for URL: {url}")
            return None
    except Exception as e:
        print(f"Error processing URL {url}: {str(e)}")
        return None

def identify_face(image_url, person_group_id):
    identity_results = {}
    try:
        detected_faces = face_client.face.detect_with_url(image_url)
        if not detected_faces:
            print(f"No faces detected in {image_url}")
            return identity_results

        face_ids = [face.face_id for face in detected_faces]
        results = face_client.face.identify(face_ids, person_group_id)

        for result in results:
            if result.candidates:
                for candidate in result.candidates:
                    name_from_id = extract_data_from_ids(form_recognizer_client, image_url)
                    if name_from_id:
                        is_confident = candidate.confidence > 0.5
                        identity_results[name_from_id] = is_confident
        return identity_results
    except Exception as e:
        print(f"Error encountered during identification: {e}")
        return identity_results
# Create and train person group
def build_person_group(client, person_group_id):
    try:
        print('Create and build a person group...')
        # Create empty Person Group.
        client.person_group.create(person_group_id=person_group_id, name=person_group_id)

        for url in ID_URLs:
            person_name = url.split('/')[-1].replace('.png', '')
            print(f"Processing for {person_name}")
            person = client.person_group_person.create(person_group_id, person_name)

            # Use thumbnails from the local directory that match the person_name
            my_face_images = glob.glob(person_name + '-face*.jpg')
            if not my_face_images:
                print(f"No local images found for {person_name}")

            for image_path in my_face_images:
                print(f"Processing image {image_path}")
                with open(image_path, 'rb') as w:
                    detected_faces = face_client.face.detect_with_stream(w)
                    if not detected_faces:
                        print(f"No faces detected in {image_path}. Skipping this image.")
                        continue

                    # Add image to person group if a face is detected
                    w.seek(0)  # Reset stream position
                    client.person_group_person.add_face_from_stream(person_group_id, person.person_id, w)
                    print(f"Added face from {image_path} to {person_name}")

        # Train the person group
        print("Training the person group...")
        client.person_group.train(person_group_id)

        while True:
            training_status = client.person_group.get_training_status(person_group_id)
            print("Training status: {}.".format(training_status.status))
            if training_status.status is TrainingStatusType.succeeded:
                print("Training succeeded!")
                break
            elif training_status.status is TrainingStatusType.failed:
                sys.exit('Training the person group has failed.')
            time.sleep(5)
    except Exception as e:
        print(f"Error encountered: {e}")


# Build and train person group
build_person_group(face_client, PERSON_GROUP_ID)

# Identify each ID_URL with the trained person group
valid_names_dict = {}
for url in ID_URLs:
    results = identify_face(url, PERSON_GROUP_ID)
    valid_names_dict.update(results)

print(valid_names_dict)
Create and build a person group...
Processing for ca-dl-avkash-chauhan
Processing image ca-dl-avkash-chauhan-face1.jpg
Added face from ca-dl-avkash-chauhan-face1.jpg to ca-dl-avkash-chauhan
Processing image ca-dl-avkash-chauhan-face2.jpg
Added face from ca-dl-avkash-chauhan-face2.jpg to ca-dl-avkash-chauhan
Processing image ca-dl-avkash-chauhan-face3.jpg
Added face from ca-dl-avkash-chauhan-face3.jpg to ca-dl-avkash-chauhan
Processing image ca-dl-avkash-chauhan-face4.jpg
Added face from ca-dl-avkash-chauhan-face4.jpg to ca-dl-avkash-chauhan
Processing image ca-dl-avkash-chauhan-face5.jpg
Added face from ca-dl-avkash-chauhan-face5.jpg to ca-dl-avkash-chauhan
Processing for ca-dl-james-jackson
Processing image ca-dl-james-jackson-face1.jpg
Added face from ca-dl-james-jackson-face1.jpg to ca-dl-james-jackson
Processing image ca-dl-james-jackson-face2.jpg
Added face from ca-dl-james-jackson-face2.jpg to ca-dl-james-jackson
Processing image ca-dl-james-jackson-face3.jpg
Added face from ca-dl-james-jackson-face3.jpg to ca-dl-james-jackson
Processing image ca-dl-james-jackson-face4.jpg
Added face from ca-dl-james-jackson-face4.jpg to ca-dl-james-jackson
Processing image ca-dl-james-jackson-face5.jpg
No faces detected in ca-dl-james-jackson-face5.jpg. Skipping this image.
Processing for ca-dl-james-webb
Processing image ca-dl-james-webb-face1.jpg
Added face from ca-dl-james-webb-face1.jpg to ca-dl-james-webb
Processing image ca-dl-james-webb-face2.jpg
Added face from ca-dl-james-webb-face2.jpg to ca-dl-james-webb
Processing image ca-dl-james-webb-face3.jpg
No faces detected in ca-dl-james-webb-face3.jpg. Skipping this image.
Processing image ca-dl-james-webb-face4.jpg
Added face from ca-dl-james-webb-face4.jpg to ca-dl-james-webb
Processing image ca-dl-james-webb-face5.jpg
No faces detected in ca-dl-james-webb-face5.jpg. Skipping this image.
Processing for ca-dl-libby-herold
Processing image ca-dl-libby-herold-face1.jpg
Added face from ca-dl-libby-herold-face1.jpg to ca-dl-libby-herold
Processing image ca-dl-libby-herold-face2.jpg
Added face from ca-dl-libby-herold-face2.jpg to ca-dl-libby-herold
Processing image ca-dl-libby-herold-face3.jpg
No faces detected in ca-dl-libby-herold-face3.jpg. Skipping this image.
Processing image ca-dl-libby-herold-face4.jpg
Added face from ca-dl-libby-herold-face4.jpg to ca-dl-libby-herold
Processing image ca-dl-libby-herold-face5.jpg
No faces detected in ca-dl-libby-herold-face5.jpg. Skipping this image.
Processing for ca-dl-radha-s-kumar
Processing image ca-dl-radha-s-kumar-face1.jpg
Added face from ca-dl-radha-s-kumar-face1.jpg to ca-dl-radha-s-kumar
Processing image ca-dl-radha-s-kumar-face2.jpg
Added face from ca-dl-radha-s-kumar-face2.jpg to ca-dl-radha-s-kumar
Processing image ca-dl-radha-s-kumar-face3.jpg
No faces detected in ca-dl-radha-s-kumar-face3.jpg. Skipping this image.
Processing image ca-dl-radha-s-kumar-face4.jpg
Added face from ca-dl-radha-s-kumar-face4.jpg to ca-dl-radha-s-kumar
Processing image ca-dl-radha-s-kumar-face5.jpg
Added face from ca-dl-radha-s-kumar-face5.jpg to ca-dl-radha-s-kumar
Training the person group...
Training status: TrainingStatusType.running.
Training status: TrainingStatusType.succeeded.
Training succeeded!
{'AVKASH CHAUHAN CHAUHAN': True, 'James Jackson': True, 'James Webb': True, 'Libby Herold': True, 'Radha SKumar': True}

Step 4¶

In [26]:
PREDICTION_ENDPOINT = ''
prediction_key = ""
prediction_resource_id = ""
In [27]:
prediction_credentials = ApiKeyCredentials(in_headers={"Prediction-key": prediction_key})
predictor = CustomVisionPredictionClient(PREDICTION_ENDPOINT, prediction_credentials)
In [28]:
def perform_prediction(image_file_url):
    # Fetch the image content from the URL
    response = requests.get(image_file_url)
    if response.status_code == 200:
        image_contents = response.content
        
        # Load the image for visualization
        image = Image.open(BytesIO(image_contents))
        width, height = image.size
        
        results = predictor.detect_image("3b8974a9-03cd-436e-8686-cf429d7b0c42", "Iteration5", image_contents)
        
        # Sort the results based on probability and select top 5
        sorted_predictions = sorted(results.predictions, key=lambda x: x.probability, reverse=True)[:5]
        
        # Display the results
        fig, ax = plt.subplots(figsize=(10,10))
        ax.imshow(image)
        ax.axis('off')
        
        # Display the top 5 tags on the image and draw bounding boxes
        for idx, prediction in enumerate(sorted_predictions):
            tag = "{}: {:.2f}%".format(prediction.tag_name, prediction.probability * 100)
            plt.text(10, (idx + 1) * 30, tag, fontsize=12, bbox=dict(facecolor='red', alpha=0.5))
            
            # Draw rectangle around the detected object
            left = prediction.bounding_box.left * width
            top = prediction.bounding_box.top * height
            rect_width = prediction.bounding_box.width * width
            rect_height = prediction.bounding_box.height * height
            
            rect = patches.Rectangle((left, top), rect_width, rect_height, linewidth=1, edgecolor='r', facecolor='none')
            ax.add_patch(rect)
        
        plt.show()
        
    else:
        print("Failed to fetch the image content from the provided URL")

file_name = "https://mymlworkspace6225174622.blob.core.windows.net/lighters/lighter_test_set_1of5.jpg"
perform_prediction(file_name)
No description has been provided for this image

Step 2¶

In [29]:
AZURE_FORM_RECOGNIZER_ENDPOINT = ""
AZURE_FORM_RECOGNIZER_KEY = ""
In [30]:
endpoint = AZURE_FORM_RECOGNIZER_ENDPOINT
key = AZURE_FORM_RECOGNIZER_KEY
In [31]:
form_training_client = FormTrainingClient(endpoint=endpoint, credential=AzureKeyCredential(key))
In [32]:
saved_model_list = form_training_client.list_custom_models()

Training Source Data URL¶

To generate the training data URL, you can

  1. Download the Cognito Corporation training documents to your local system: https://github.com/udacity/cd0461-building-computer-vision-solutions-with-azure-exercises/tree/main/resources/cognito-corp-docs.
  2. Upload the training documents to a blob container at Azure Blob Storage. Training documents are named Cognito-corporation-u*.pdf.
  3. Generate a SAS URL of the training data container.
  4. Once the model is trained, you will use the Cognito-corporation-test01.png file located in this GitHub directory to perform prediction: https://raw.githubusercontent.com/udacity/cd0461-building-computer-vision-solutions-with-azure-exercises/main/resources/Cognito-corporation-test01.png
In [33]:
# Blob Storage for Training Directory

trainingDataUrl = ""

Performing Unlabeled Training¶

In [34]:
training_process = form_training_client.begin_training(trainingDataUrl, use_training_labels=False)
custom_model = training_process.result()

Getting Model Info¶

In [35]:
custom_model
Out[35]:
CustomFormModel(model_id=20ee710d-ff42-4688-b5ce-42f457880404, status=ready, training_started_on=2023-10-30 02:51:14+00:00, training_completed_on=2023-10-30 02:51:37+00:00, submodels=[CustomFormSubmodel(accuracy=None, model_id=20ee710d-ff42-4688-b5ce-42f457880404, fields={'field-0': CustomFormModelField(label=Baggage, name=field-0, accuracy=None), 'field-1': CustomFormModelField(label=Boarding Time, name=field-1, accuracy=None), 'field-2': CustomFormModelField(label=Carrier, name=field-2, accuracy=None), 'field-3': CustomFormModelField(label=Chicago, name=field-3, accuracy=None), 'field-4': CustomFormModelField(label=Class, name=field-4, accuracy=None), 'field-5': CustomFormModelField(label=Date, name=field-5, accuracy=None), 'field-6': CustomFormModelField(label=Flight No., name=field-6, accuracy=None), 'field-7': CustomFormModelField(label=From, name=field-7, accuracy=None), 'field-8': CustomFormModelField(label=From:, name=field-8, accuracy=None), 'field-9': CustomFormModelField(label=GATE, name=field-9, a
In [36]:
custom_model.model_id
Out[36]:
'20ee710d-ff42-4688-b5ce-42f457880404'
In [37]:
custom_model.status
Out[37]:
'ready'
In [38]:
custom_model.training_started_on
Out[38]:
datetime.datetime(2023, 10, 30, 2, 51, 14, tzinfo=<isodate.tzinfo.Utc object at 0x000001767AD160D0>)
In [39]:
custom_model.training_completed_on
Out[39]:
datetime.datetime(2023, 10, 30, 2, 51, 37, tzinfo=<isodate.tzinfo.Utc object at 0x000001767AD160D0>)
In [40]:
custom_model.training_documents
Out[40]:
[TrainingDocumentInfo(name=boarding-james-webb.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding-libby.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass1.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass10.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass2.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass3.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass4.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass5.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass6.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass7.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass8.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass9.pdf, status=succeeded, page_count=1, errors=[], model_id=None)]
In [41]:
custom_model_info = form_training_client.get_custom_model(model_id=custom_model.model_id)
print("Model ID: {}".format(custom_model_info.model_id))
print("Status: {}".format(custom_model_info.status))
print("Training started on: {}".format(custom_model_info.training_started_on))
print("Training completed on: {}".format(custom_model_info.training_completed_on))
Model ID: 20ee710d-ff42-4688-b5ce-42f457880404
Status: ready
Training started on: 2023-10-30 02:51:14+00:00
Training completed on: 2023-10-30 02:51:37+00:00

Using an image document as test document URL (Not using PDF here)¶

  • Here, you will use the Cognito-corporation-test01.png file located in this GitHub directory to perform prediction: https://raw.githubusercontent.com/udacity/cd0461-building-computer-vision-solutions-with-azure-exercises/main/resources/Cognito-corporation-test01.png
  • Note: If you want to use a PDF document for the test, please save and upload PDF to Azure Blob Storage and use the SAS URL of this PDF document as the target URL.
  • Using a PDF document from the GitHub URL will give you an error.
  • You will see a screenshot of how to do this on the exercise solution page later in this lesson.
In [42]:
new_test_url = ""
In [43]:
form_recognizer_client = FormRecognizerClient(endpoint=endpoint, credential=AzureKeyCredential(key))
In [44]:
custom_test_action = form_recognizer_client.begin_recognize_custom_forms_from_url(model_id=custom_model_info.model_id, form_url=new_test_url)
In [45]:
custom_test_action_result = custom_test_action.result()
In [46]:
labeled_training_process = form_training_client.begin_training(trainingDataUrl, use_training_labels=True)
labeled_custom_model = labeled_training_process.result()
In [47]:
labeled_custom_model.training_documents
Out[47]:
[TrainingDocumentInfo(name=boarding_pass1.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass10.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass2.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass3.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass4.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass5.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass6.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass7.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass8.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass9.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding-james-webb.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding-libby.pdf, status=succeeded, page_count=1, errors=[], model_id=None)]
In [48]:
labeled_custom_test_action = form_recognizer_client.begin_recognize_custom_forms_from_url(model_id=labeled_custom_model.model_id, form_url=new_test_url)
labeled_custom_test_action_result = labeled_custom_test_action.result()
for recognized_content in labeled_custom_test_action_result:
    print("Form type: {}".format(recognized_content.form_type))
    for name, field in recognized_content.fields.items():
        print("Field '{}' has label '{}' with value '{}' and a confidence score of {}".format(
            name,
            field.label_data.text if field.label_data else name,
            field.value,
            field.confidence
        ))
Form type: custom:6cfe75bb-eadd-4d0a-a059-b708310d743b
Field 'Bag' has label 'Bag' with value 'NO' and a confidence score of 0.598
Field 'Time' has label 'Time' with value '10:00 AM PST' and a confidence score of 0.422
Field 'Class' has label 'Class' with value 'E' and a confidence score of 0.689
Field 'Flight' has label 'Flight' with value '234' and a confidence score of 0.99
Field 'To' has label 'To' with value 'Chicago' and a confidence score of 0.99
Field 'Gate' has label 'Gate' with value 'G1' and a confidence score of 0.99
Field 'Carrier' has label 'Carrier' with value 'UA' and a confidence score of 0.99
Field 'From' has label 'From' with value 'San Francisco' and a confidence score of 0.99
Field 'Date' has label 'Date' with value 'April 20, 2022' and a confidence score of 0.653
Field 'Name' has label 'Name' with value 'Avkash Chauhan' and a confidence score of 0.99
Field 'Seat' has label 'Seat' with value '20A' and a confidence score of 0.988

Listing Models¶

In [49]:
saved_model_list = form_training_client.list_custom_models()
In [50]:
## Creating another model with labeled training
labeled_2_training_process = form_training_client.begin_training(trainingDataUrl, use_training_labels=True)
labeled_2_custom_model = labeled_2_training_process.result()
In [51]:
cognito_corporation_model_list = [labeled_custom_model.model_id, labeled_2_custom_model.model_id]
In [52]:
composed_process = form_training_client.begin_create_composed_model(
            cognito_corporation_model_list, model_name="Cognito Corporation Model")
composed_process_model = composed_process.result()
In [53]:
composed_model_info = form_training_client.get_custom_model(model_id=composed_process_model.model_id)
print("Model ID: {}".format(composed_model_info.model_id))
print("Status: {}".format(composed_model_info.status))
print("Training started on: {}".format(composed_model_info.training_started_on))
print("Training completed on: {}".format(composed_model_info.training_completed_on))
Model ID: 83d4a61f-27c0-4565-bb1b-458be09d2846
Status: ready
Training started on: 2023-10-30 02:52:09+00:00
Training completed on: 2023-10-30 02:52:09+00:00

Using the composed model to extract data from the boarding_pass¶

In [54]:
# Global list to store flight-specific information from each boarding pass
flight_info_list = []

# Global array to store the names
names_array = []

def process_training_data_urls(form_recognizer_client, composed_process_model, training_data_urls):
    """
    Process and print recognized forms from a list of training data URLs.
    
    :param form_recognizer_client: The Form Recognizer client.
    :param composed_process_model: The composed model object.
    :param training_data_urls: List of training data URLs to be processed.
    """
    global flight_info_list  # Indicate that we're using the global list
    global names_array  # Indicate that we're using the global array

    # Assuming each URL in the list is a separate training dataset
    for training_data_url in training_data_urls:
        composed_model_testing = form_recognizer_client.begin_recognize_custom_forms_from_url(model_id=composed_process_model.model_id, form_url=training_data_url)
        composed_model_testing_result = composed_model_testing.result()
        for recognized_content in composed_model_testing_result:
            # Create an empty dictionary for each boarding pass
            flight_info_dict = {}
            print("Form type: {}".format(recognized_content.form_type))
            
            for name, field in recognized_content.fields.items():
                # If the field is "Name", append its value to the global array
                if name == "Name":
                    names_array.append(field.value)

                # Populate the dictionary with the desired fields
                if name in ['Flight', 'Name', 'From', 'Carrier', 'Seat', 'Time', 'Date', 'Class', 'To']:
                    flight_info_dict[name] = field.value

                print("Field '{}' has label '{}' with value '{}' and a confidence score of {}".format(
                name,
                field.label_data.text if field.label_data else name,
                field.value,
                field.confidence
                ))
            
            # Append the dictionary to the global list
            flight_info_list.append(flight_info_dict)
            print("-----------------------------------------------------------------")

# Boarding Passes
training_data_urls = [
    "https://mymlworkspace6225174622.blob.core.windows.net/boardingpass/pass1.png",
    "https://mymlworkspace6225174622.blob.core.windows.net/boardingpass/pass2.png",
    "https://mymlworkspace6225174622.blob.core.windows.net/boardingpass/pass3.png",
    "https://mymlworkspace6225174622.blob.core.windows.net/boardingpass/pass4.png",
    "https://mymlworkspace6225174622.blob.core.windows.net/boardingpass/pass5.png",
]
process_training_data_urls(form_recognizer_client, composed_process_model, training_data_urls)

# To view the collected flight information
for flight_info in flight_info_list:
    print(flight_info)
print("-----------------------------------------------------------------")

# To view the collected names
print(names_array)
Form type: Cognito Corporation Model:b1f7be88-9c5c-4353-962f-f821a1853ebd
Field 'Bag' has label 'Bag' with value 'NO' and a confidence score of 0.598
Field 'Time' has label 'Time' with value '10:00 AM PST' and a confidence score of 0.422
Field 'Class' has label 'Class' with value 'E' and a confidence score of 0.689
Field 'Flight' has label 'Flight' with value '234' and a confidence score of 0.99
Field 'To' has label 'To' with value 'Chicago' and a confidence score of 0.99
Field 'Gate' has label 'Gate' with value 'G1' and a confidence score of 0.99
Field 'Carrier' has label 'Carrier' with value 'UA' and a confidence score of 0.99
Field 'From' has label 'From' with value 'San Francisco' and a confidence score of 0.99
Field 'Date' has label 'Date' with value 'April 20, 2022' and a confidence score of 0.653
Field 'Name' has label 'Name' with value 'Avkash Chauhan' and a confidence score of 0.99
Field 'Seat' has label 'Seat' with value '20A' and a confidence score of 0.988
-----------------------------------------------------------------
Form type: Cognito Corporation Model:b1f7be88-9c5c-4353-962f-f821a1853ebd
Field 'Bag' has label 'Bag' with value 'YES' and a confidence score of 0.38
Field 'Time' has label 'Time' with value '10:00 AM PST' and a confidence score of 0.398
Field 'Class' has label 'Class' with value 'E' and a confidence score of 0.641
Field 'Flight' has label 'Flight' with value '234' and a confidence score of 0.99
Field 'To' has label 'To' with value 'Chicago' and a confidence score of 0.99
Field 'Gate' has label 'Gate' with value 'G1' and a confidence score of 0.99
Field 'Carrier' has label 'Carrier' with value 'UA' and a confidence score of 0.99
Field 'From' has label 'From' with value 'San Francisco' and a confidence score of 0.99
Field 'Date' has label 'Date' with value 'April 20, 2022' and a confidence score of 0.65
Field 'Name' has label 'Name' with value 'James Jackson' and a confidence score of 0.99
Field 'Seat' has label 'Seat' with value '25B' and a confidence score of 0.987
-----------------------------------------------------------------
Form type: Cognito Corporation Model:b1f7be88-9c5c-4353-962f-f821a1853ebd
Field 'Date' has label 'Date' with value 'April 20, 2022' and a confidence score of 0.66
Field 'Flight' has label 'Flight' with value '234' and a confidence score of 0.99
Field 'Bag' has label 'Bag' with value 'YES' and a confidence score of 0.387
Field 'Carrier' has label 'Carrier' with value 'UA' and a confidence score of 0.99
Field 'Name' has label 'Name' with value 'James Webb' and a confidence score of 0.99
Field 'Gate' has label 'Gate' with value 'G1' and a confidence score of 0.99
Field 'Class' has label 'Class' with value 'B' and a confidence score of 0.721
Field 'From' has label 'From' with value 'San Francisco' and a confidence score of 0.99
Field 'Seat' has label 'Seat' with value '1A' and a confidence score of 0.986
Field 'To' has label 'To' with value 'Chicago' and a confidence score of 0.99
Field 'Time' has label 'Time' with value '10:00 AM PST' and a confidence score of 0.382
-----------------------------------------------------------------
Form type: Cognito Corporation Model:b1f7be88-9c5c-4353-962f-f821a1853ebd
Field 'Bag' has label 'Bag' with value 'YES' and a confidence score of 0.424
Field 'Time' has label 'Time' with value '10:00 AM PST' and a confidence score of 0.397
Field 'Class' has label 'Class' with value 'B' and a confidence score of 0.725
Field 'Flight' has label 'Flight' with value '234' and a confidence score of 0.99
Field 'To' has label 'To' with value 'Chicago' and a confidence score of 0.99
Field 'Gate' has label 'Gate' with value 'G1' and a confidence score of 0.99
Field 'Carrier' has label 'Carrier' with value 'UA' and a confidence score of 0.99
Field 'From' has label 'From' with value 'San Francisco' and a confidence score of 0.99
Field 'Date' has label 'Date' with value 'April 20, 2022' and a confidence score of 0.656
Field 'Name' has label 'Name' with value 'Libby Herold' and a confidence score of 0.99
Field 'Seat' has label 'Seat' with value '3D' and a confidence score of 0.987
-----------------------------------------------------------------
Form type: Cognito Corporation Model:b1f7be88-9c5c-4353-962f-f821a1853ebd
Field 'Bag' has label 'Bag' with value 'YES' and a confidence score of 0.38
Field 'Time' has label 'Time' with value '10:00 AM PST' and a confidence score of 0.381
Field 'Class' has label 'Class' with value 'E' and a confidence score of 0.727
Field 'Flight' has label 'Flight' with value '234' and a confidence score of 0.99
Field 'To' has label 'To' with value 'Chicago' and a confidence score of 0.99
Field 'Gate' has label 'Gate' with value 'G1' and a confidence score of 0.99
Field 'Carrier' has label 'Carrier' with value 'UA' and a confidence score of 0.99
Field 'From' has label 'From' with value 'San Francisco' and a confidence score of 0.99
Field 'Date' has label 'Date' with value 'April 20, 2022' and a confidence score of 0.649
Field 'Name' has label 'Name' with value 'Radha S Kumar' and a confidence score of 0.99
Field 'Seat' has label 'Seat' with value '34B' and a confidence score of 0.987
-----------------------------------------------------------------
{'Time': '10:00 AM PST', 'Class': 'E', 'Flight': '234', 'To': 'Chicago', 'Carrier': 'UA', 'From': 'San Francisco', 'Date': 'April 20, 2022', 'Name': 'Avkash Chauhan', 'Seat': '20A'}
{'Time': '10:00 AM PST', 'Class': 'E', 'Flight': '234', 'To': 'Chicago', 'Carrier': 'UA', 'From': 'San Francisco', 'Date': 'April 20, 2022', 'Name': 'James Jackson', 'Seat': '25B'}
{'Date': 'April 20, 2022', 'Flight': '234', 'Carrier': 'UA', 'Name': 'James Webb', 'Class': 'B', 'From': 'San Francisco', 'Seat': '1A', 'To': 'Chicago', 'Time': '10:00 AM PST'}
{'Time': '10:00 AM PST', 'Class': 'B', 'Flight': '234', 'To': 'Chicago', 'Carrier': 'UA', 'From': 'San Francisco', 'Date': 'April 20, 2022', 'Name': 'Libby Herold', 'Seat': '3D'}
{'Time': '10:00 AM PST', 'Class': 'E', 'Flight': '234', 'To': 'Chicago', 'Carrier': 'UA', 'From': 'San Francisco', 'Date': 'April 20, 2022', 'Name': 'Radha S Kumar', 'Seat': '34B'}
-----------------------------------------------------------------
['Avkash Chauhan', 'James Jackson', 'James Webb', 'Libby Herold', 'Radha S Kumar']

Validate ID Cards¶

In [55]:
content_url_list =[
    "https://mymlworkspace6225174622.blob.core.windows.net/digitalid/ca-dl-avkash-chauhan.png",
    "https://mymlworkspace6225174622.blob.core.windows.net/digitalid/ca-dl-james-jackson.png",
    "https://mymlworkspace6225174622.blob.core.windows.net/digitalid/ca-dl-james-webb.png",
    "https://mymlworkspace6225174622.blob.core.windows.net/digitalid/ca-dl-libby-herold.png",
    "https://mymlworkspace6225174622.blob.core.windows.net/digitalid/ca-dl-radha-s-kumar.png"
]

Get DOB from IDs¶

In [56]:
# Global dictionary to capture DateOfBirth using full name as the key
dob_dict = {}

def print_id_card_details(identity_card):
    first_name = identity_card.fields.get("FirstName")
    last_name = identity_card.fields.get("LastName")
    full_name = None
    if first_name and last_name:
        full_name = f"{first_name.value} {last_name.value}"
        print(f"Full Name: {full_name}")
    else:
        print("Name not found!")
        return  # Exit the function if full name is not found
    
    dob = identity_card.fields.get("DateOfBirth")
    if dob:
        print("Date of Birth: {} has confidence: {}".format(dob.value, dob.confidence))
        # Add DateOfBirth to the global dictionary using full name as the key
        dob_dict[full_name] = dob.value


def extract_data_from_ids(form_recognizer_client, url_list):
    for url in url_list:
        print(f"Processing URL: {url}")
        try:
            id_content_from_url = form_recognizer_client.begin_recognize_identity_documents_from_url(url)
            collected_id_cards = id_content_from_url.result()
            if collected_id_cards:
                print_id_card_details(collected_id_cards[0])
            else:
                print(f"No data found for URL: {url}")
        except Exception as e:
            print(f"Error processing URL {url}: {str(e)}")
    # Print the captured DateOfBirth using full name as the key after processing all URLs
    print("Captured Dates of Birth:", dob_dict)

# Use the function
extract_data_from_ids(form_recognizer_client, content_url_list)
Processing URL: https://mymlworkspace6225174622.blob.core.windows.net/digitalid/ca-dl-avkash-chauhan.png
Full Name: AVKASH CHAUHAN CHAUHAN
Date of Birth: 1990-01-01 has confidence: 0.995
Processing URL: https://mymlworkspace6225174622.blob.core.windows.net/digitalid/ca-dl-james-jackson.png
Full Name: James Jackson
Date of Birth: 1956-10-12 has confidence: 0.995
Processing URL: https://mymlworkspace6225174622.blob.core.windows.net/digitalid/ca-dl-james-webb.png
Full Name: James Webb
Date of Birth: 1970-12-15 has confidence: 0.995
Processing URL: https://mymlworkspace6225174622.blob.core.windows.net/digitalid/ca-dl-libby-herold.png
Full Name: Libby Herold
Date of Birth: 1996-02-10 has confidence: 0.995
Processing URL: https://mymlworkspace6225174622.blob.core.windows.net/digitalid/ca-dl-radha-s-kumar.png
Full Name: Radha SKumar
Date of Birth: 1994-03-05 has confidence: 0.995
Captured Dates of Birth: {'AVKASH CHAUHAN CHAUHAN': datetime.date(1990, 1, 1), 'James Jackson': datetime.date(1956, 10, 12), 'James Webb': datetime.date(1970, 12, 15), 'Libby Herold': datetime.date(1996, 2, 10), 'Radha SKumar': datetime.date(1994, 3, 5)}

Set the flightmanifestWrite Validation fields to False¶

In [57]:
def ensure_validation_fields_false():
    # Define Azure Storage details
    account_name = ''
    account_key = ''
    blob_service_client = BlobServiceClient(account_url=f"https://{account_name}.blob.core.windows.net", credential=account_key)
    
    # Read data from flightmanifestwrite
    write_container_name = 'flightmanifestwrite'
    write_blob_name = 'FlightManifest.csv'
    blob_client = blob_service_client.get_blob_client(container=write_container_name, blob=write_blob_name)
    blob_data = blob_client.download_blob()
    blob_content = blob_data.readall()
    
    # Convert to pandas dataframe
    df = pd.read_csv(io.BytesIO(blob_content))
    
    # Debugging: Print current dataframe as it would appear in a CSV
    print("Before DataFrame:")
    print(df.to_string(index=False))
    
    # Iterate through columns that end with "Validation" and set to "False"
    for col in df.columns:
        if col.endswith("Validation"):
            df[col] = "False"
    
    # Debugging: Print intermediate dataframe as it would appear in a CSV
    print("\nIntermediate DataFrame after setting Validation columns to False:")
    print(df.to_string(index=False))
    
    # Removing duplicate columns
    df = df.loc[:,~df.columns.duplicated()]
    
    # Debugging: Print updated dataframe as it would appear in a CSV
    print("\nUpdated DataFrame after removing duplicate columns:")
    print(df.to_string(index=False))

    # Write the modified dataframe back to flight_manifest_write
    output_stream = io.StringIO()
    df.to_csv(output_stream, index=False)
    output_stream.seek(0)  # Move the cursor to the start of the file
    blob_client = blob_service_client.get_blob_client(container=write_container_name, blob=write_blob_name)
    blob_client.upload_blob(output_stream.getvalue(), overwrite=True)

    print("\nValidation fields have been set to False and data successfully uploaded to flightmanifestwrite")

# Call the function to execute it
ensure_validation_fields_false()
Before DataFrame:
Carrier  Flight No.    Class          From      To           Date Baggage Seat Gate Boarding Time  Ticket No First Name Last Name Sex      DateofBirth  DoBValidation  PersonValidation  LuggageValidation  NameValidation  BoardingPassValidation
     UA         234  Economy San Francisco Chicago April 20, 2022     YES  20A   G1  10:00 AM PST   34236746     Avkash   Chauhan   M   1 January 1990           True              True               True            True                    True
     UA         234  Economy San Francisco Chicago April 20, 2022     YES   1A   G1  10:00 AM PST   34236747      James      Webb   M 15 December 1970           True              True               True            True                    True
     UA         234  Economy San Francisco Chicago April 20, 2022      No  25B   G1  10:00 AM PST   34236748      James   Jackson   M  12 October 1956           True             False               True            True                    True
     UA         234 Business San Francisco Chicago April 20, 2022      No   3D   G1  10:00 AM PST   34236749      Libby    Herold   F 10 February 1996           True              True               True            True                    True
     UA         234  Economy San Francisco Chicago April 20, 2022     YES  34B   G1  10:00 AM PST   34236750    Radha S     Kumar   F     5 March 1994           True              True               True            True                   False

Intermediate DataFrame after setting Validation columns to False:
Carrier  Flight No.    Class          From      To           Date Baggage Seat Gate Boarding Time  Ticket No First Name Last Name Sex      DateofBirth DoBValidation PersonValidation LuggageValidation NameValidation BoardingPassValidation
     UA         234  Economy San Francisco Chicago April 20, 2022     YES  20A   G1  10:00 AM PST   34236746     Avkash   Chauhan   M   1 January 1990         False            False             False          False                  False
     UA         234  Economy San Francisco Chicago April 20, 2022     YES   1A   G1  10:00 AM PST   34236747      James      Webb   M 15 December 1970         False            False             False          False                  False
     UA         234  Economy San Francisco Chicago April 20, 2022      No  25B   G1  10:00 AM PST   34236748      James   Jackson   M  12 October 1956         False            False             False          False                  False
     UA         234 Business San Francisco Chicago April 20, 2022      No   3D   G1  10:00 AM PST   34236749      Libby    Herold   F 10 February 1996         False            False             False          False                  False
     UA         234  Economy San Francisco Chicago April 20, 2022     YES  34B   G1  10:00 AM PST   34236750    Radha S     Kumar   F     5 March 1994         False            False             False          False                  False

Updated DataFrame after removing duplicate columns:
Carrier  Flight No.    Class          From      To           Date Baggage Seat Gate Boarding Time  Ticket No First Name Last Name Sex      DateofBirth DoBValidation PersonValidation LuggageValidation NameValidation BoardingPassValidation
     UA         234  Economy San Francisco Chicago April 20, 2022     YES  20A   G1  10:00 AM PST   34236746     Avkash   Chauhan   M   1 January 1990         False            False             False          False                  False
     UA         234  Economy San Francisco Chicago April 20, 2022     YES   1A   G1  10:00 AM PST   34236747      James      Webb   M 15 December 1970         False            False             False          False                  False
     UA         234  Economy San Francisco Chicago April 20, 2022      No  25B   G1  10:00 AM PST   34236748      James   Jackson   M  12 October 1956         False            False             False          False                  False
     UA         234 Business San Francisco Chicago April 20, 2022      No   3D   G1  10:00 AM PST   34236749      Libby    Herold   F 10 February 1996         False            False             False          False                  False
     UA         234  Economy San Francisco Chicago April 20, 2022     YES  34B   G1  10:00 AM PST   34236750    Radha S     Kumar   F     5 March 1994         False            False             False          False                  False

Validation fields have been set to False and data successfully uploaded to flightmanifestwrite

Validate Boarding Pass and Name on ID Card¶

In [58]:
EXPECTED_COLUMNS = [
    "Carrier", "Flight No.", "Class", "From", "To", "Date", "Baggage", 
    "Seat", "Gate", "Boarding Time", "Ticket No", "First Name", "Last Name", 
    "Sex", "DateofBirth", "DoBValidation", "PersonValidation", "LuggageValidation", 
    "NameValidation", "BoardingPassValidation"
]

def normalize_name(first_name, last_name):
    return " ".join((first_name + " " + last_name).split()).lower()

def update_name_validation(df, names_array):
    valid_names_set = set(name.lower() for name in names_array)
    updated_rows_name = 0

    for index, row in df.iterrows():
        full_name = normalize_name(row['First Name'], row['Last Name'])
        if full_name in valid_names_set:
            df.at[index, 'NameValidation'] = True
            updated_rows_name += 1
        else:
            print(f"Failed NameValidation for: {full_name} (Row: {index + 1})")

    return df, updated_rows_name

def update_boarding_pass_validation(df, flight_info_list):
    person_info_dict = {info['Name'].lower(): info for info in flight_info_list}
    updated_rows_person = 0
    
    for index, row in df.iterrows():
        full_name = normalize_name(row['First Name'], row['Last Name'])
        if full_name in person_info_dict:
            if str(row['Flight No.']) == str(person_info_dict[full_name]['Flight']):
                df.at[index, 'BoardingPassValidation'] = True
                updated_rows_person += 1
            else:
                print(f"Failed BoardingPassValidation (flight mismatch) for: {full_name} (Row: {index + 1})")
        else:
            print(f"Failed BoardingPassValidation (name not found) for: {full_name} (Row: {index + 1})")

    return df, updated_rows_person

def update_flight_manifest(names_array, flight_info_list):
    try:
        account_name = ''
        account_key = ''
        blob_service_client = BlobServiceClient(account_url=f"https://{account_name}.blob.core.windows.net", credential=account_key)

        read_container_name = 'flightmanifestwrite'
        read_blob_name = 'FlightManifest.csv'
        blob_client = blob_service_client.get_blob_client(container=read_container_name, blob=read_blob_name)
        blob_data = blob_client.download_blob()
        blob_content = blob_data.readall()
        df = pd.read_csv(io.BytesIO(blob_content))

        # Normalize column names to remove leading and trailing spaces
        df.columns = [col.strip() for col in df.columns]
        
        # Debugging Step: Print the dataframe columns
        print(f"Normalized columns in the data: {df.columns.tolist()}")

        unexpected_columns = set(df.columns) - set(EXPECTED_COLUMNS)
        missing_columns = set(EXPECTED_COLUMNS) - set(df.columns)

        if unexpected_columns:
            print(f"Unexpected columns: {list(unexpected_columns)}")
        if missing_columns:
            print(f"Missing columns: {list(missing_columns)}")
        if unexpected_columns or missing_columns:
            raise ValueError("Column mismatch detected in the data!")

        df, updated_names = update_name_validation(df, names_array)
        df, updated_boarding_passes = update_boarding_pass_validation(df, flight_info_list)

        print(f"Total Rows Updated for NameValidation: {updated_names}")
        print(f"Total Rows Updated for BoardingPassValidation: {updated_boarding_passes}")

        output_stream = io.StringIO()
        df.to_csv(output_stream, index=False)
        output_stream.seek(0)
        blob_client.upload_blob(output_stream.getvalue(), overwrite=True)

        # Print updated dataframe
        print("Updated FlightManifest.csv:")
        print(df.head())
        print("Data successfully uploaded to flightmanifestwrite")

    except Exception as e:
        print(f"An error occurred: {e}")
# Sample call
update_flight_manifest(names_array, flight_info_list)
Normalized columns in the data: ['Carrier', 'Flight No.', 'Class', 'From', 'To', 'Date', 'Baggage', 'Seat', 'Gate', 'Boarding Time', 'Ticket No', 'First Name', 'Last Name', 'Sex', 'DateofBirth', 'DoBValidation', 'PersonValidation', 'LuggageValidation', 'NameValidation', 'BoardingPassValidation']
Total Rows Updated for NameValidation: 5
Total Rows Updated for BoardingPassValidation: 5
Updated FlightManifest.csv:
  Carrier  Flight No.     Class           From       To            Date  \
0      UA         234   Economy  San Francisco  Chicago  April 20, 2022   
1      UA         234   Economy  San Francisco  Chicago  April 20, 2022   
2      UA         234   Economy  San Francisco  Chicago  April 20, 2022   
3      UA         234  Business  San Francisco  Chicago  April 20, 2022   
4      UA         234   Economy  San Francisco  Chicago  April 20, 2022   

  Baggage Seat Gate Boarding Time  Ticket No First Name Last Name Sex  \
0     YES  20A   G1  10:00 AM PST   34236746     Avkash   Chauhan   M   
1     YES   1A   G1  10:00 AM PST   34236747      James      Webb   M   
2      No  25B   G1  10:00 AM PST   34236748      James   Jackson   M   
3      No   3D   G1  10:00 AM PST   34236749      Libby    Herold   F   
4     YES  34B   G1  10:00 AM PST   34236750    Radha S     Kumar   F   

        DateofBirth  DoBValidation  PersonValidation  LuggageValidation  \
0    1 January 1990          False             False              False   
1  15 December 1970          False             False              False   
2   12 October 1956          False             False              False   
3  10 February 1996          False             False              False   
4      5 March 1994          False             False              False   

   NameValidation  BoardingPassValidation  
0            True                    True  
1            True                    True  
2            True                    True  
3            True                    True  
4            True                    True  
Data successfully uploaded to flightmanifestwrite

Input PersonValidation, normalize data¶

In [59]:
def normalize_name(name):
    # Convert to uppercase
    name = name.upper()
    
    # Special case for "SKUMAR"
    if "SKUMAR" in name:
        name = name.replace("SKUMAR", "S KUMAR")
    
    # Remove repeated names in succession
    words = name.split()
    new_words = []
    for i, word in enumerate(words):
        # If current word is same as previous, skip
        if i > 0 and word == words[i-1]:
            continue
        new_words.append(word)
    
    # Check last word for the two uppercase condition and not fully uppercase
    last_word = new_words[-1]
    if len(last_word) > 1 and last_word[0].isupper() and last_word[1].isupper() and not last_word.isupper():
        new_words[-1] = last_word[0]  # Set the last word to initial
        new_words.append(last_word[1:])  # Append the rest as new word
    
    return ' '.join(new_words)

def normalize_dict_names(names_dict):
    normalized_names_dict = {}
    for name, value in names_dict.items():
        normalized_name = normalize_name(name)
        normalized_names_dict[normalized_name] = value
    return normalized_names_dict

# valid_names_dict = {
#    'AVKASH CHAUHAN CHAUHAN': True, 'James Jackson': True, 
#    'James Webb': True, 'Libby Herold': True, 'Radha SKumar': True//
# }

normalized_names_dict = normalize_dict_names(valid_names_dict)
print(normalized_names_dict)
{'AVKASH CHAUHAN': True, 'JAMES JACKSON': True, 'JAMES WEBB': True, 'LIBBY HEROLD': True, 'RADHA S KUMAR': True}
In [60]:
EXPECTED_COLUMNS = [
    "Carrier", "Flight No.", "Class", "From", "To", "Date", "Baggage", 
    "Seat", "Gate", "Boarding Time", "Ticket No", "First Name", "Last Name", 
    "Sex", "DateofBirth", "DoBValidation", "PersonValidation", "LuggageValidation", 
    "NameValidation", "BoardingPassValidation"
]

def normalize_name(first_name, last_name):
    # Combine, split, and join to normalize name
    name = " ".join((first_name + " " + last_name).upper().split())
    return name

def update_person_validation(df, valid_names_dict):
    updated_rows = 0
    # Create a dictionary of normalized names from the valid_names_dict
    normalized_valid_names = {normalize_name(name.split()[0], " ".join(name.split()[1:])): True for name in valid_names_dict.keys()}
    for index, row in df.iterrows():
        full_name_normalized = normalize_name(row['First Name'], row['Last Name'])
        if full_name_normalized in normalized_valid_names:
            if normalized_valid_names[full_name_normalized]:
                df.at[index, 'PersonValidation'] = True
                updated_rows += 1
            else:
                df.at[index, 'PersonValidation'] = False
    return df, updated_rows

def update_flight_manifest(valid_names_dict):
    try:
        account_name = ''
        account_key = ''
        blob_service_client = BlobServiceClient(account_url=f"https://{account_name}.blob.core.windows.net", credential=account_key)

        read_container_name = 'flightmanifestwrite'
        read_blob_name = 'FlightManifest.csv'
        blob_client = blob_service_client.get_blob_client(container=read_container_name, blob=read_blob_name)
        blob_data = blob_client.download_blob()
        blob_content = blob_data.readall()
        df = pd.read_csv(io.BytesIO(blob_content))

        # Normalize column names to remove leading and trailing spaces
        df.columns = [col.strip() for col in df.columns]
        
        unexpected_columns = set(df.columns) - set(EXPECTED_COLUMNS)
        missing_columns = set(EXPECTED_COLUMNS) - set(df.columns)

        if unexpected_columns:
            print(f"Unexpected columns: {list(unexpected_columns)}")
        if missing_columns:
            print(f"Missing columns: {list(missing_columns)}")
        if unexpected_columns or missing_columns:
            raise ValueError("Column mismatch detected in the data!")

        df, updated_persons = update_person_validation(df, valid_names_dict)
        print(f"Total Rows Updated for PersonValidation: {updated_persons}")

        output_stream = io.StringIO()
        df.to_csv(output_stream, index=False)
        output_stream.seek(0)
        blob_client.upload_blob(output_stream.getvalue(), overwrite=True)

        print("Updated FlightManifest.csv:")
        print(df.head())
        print("Data successfully uploaded to flightmanifestwrite")

    except Exception as e:
        print(f"An error occurred: {e}")


# Sample call
update_flight_manifest(normalized_names_dict)
Total Rows Updated for PersonValidation: 5
Updated FlightManifest.csv:
  Carrier  Flight No.     Class           From       To            Date  \
0      UA         234   Economy  San Francisco  Chicago  April 20, 2022   
1      UA         234   Economy  San Francisco  Chicago  April 20, 2022   
2      UA         234   Economy  San Francisco  Chicago  April 20, 2022   
3      UA         234  Business  San Francisco  Chicago  April 20, 2022   
4      UA         234   Economy  San Francisco  Chicago  April 20, 2022   

  Baggage Seat Gate Boarding Time  Ticket No First Name Last Name Sex  \
0     YES  20A   G1  10:00 AM PST   34236746     Avkash   Chauhan   M   
1     YES   1A   G1  10:00 AM PST   34236747      James      Webb   M   
2      No  25B   G1  10:00 AM PST   34236748      James   Jackson   M   
3      No   3D   G1  10:00 AM PST   34236749      Libby    Herold   F   
4     YES  34B   G1  10:00 AM PST   34236750    Radha S     Kumar   F   

        DateofBirth  DoBValidation  PersonValidation  LuggageValidation  \
0    1 January 1990          False              True              False   
1  15 December 1970          False              True              False   
2   12 October 1956          False              True              False   
3  10 February 1996          False              True              False   
4      5 March 1994          False              True              False   

   NameValidation  BoardingPassValidation  
0            True                    True  
1            True                    True  
2            True                    True  
3            True                    True  
4            True                    True  
Data successfully uploaded to flightmanifestwrite

Validate DOB¶

In [61]:
import datetime

def normalize_name(name):
    # Convert to uppercase
    name = name.upper()
    
    # Special case for "SKUMAR"
    if "SKUMAR" in name:
        name = name.replace("SKUMAR", "S KUMAR")
    
    # Remove repeated names in succession
    words = name.split()
    new_words = []
    for i, word in enumerate(words):
        # If current word is same as previous, skip
        if i > 0 and word == words[i-1]:
            continue
        new_words.append(word)
    
    # Check last word for the two uppercase condition and not fully uppercase
    last_word = new_words[-1]
    if len(last_word) > 1 and last_word[0].isupper() and last_word[1].isupper() and not last_word.isupper():
        new_words[-1] = last_word[0]  # Set the last word to initial
        new_words.append(last_word[1:])  # Append the rest as new word
    
    return ' '.join(new_words).lower()  # Convert the normalized name to lowercase

def normalize_dict_names(names_dict):
    normalized_names_dict = {}
    for name, value in names_dict.items():
        normalized_name = normalize_name(name)
        normalized_names_dict[normalized_name] = value
    return normalized_names_dict

#dob_dict ={
#    'AVKASH CHAUHAN CHAUHAN': datetime.date(1990, 1, 1), 
#    'James Jackson': datetime.date(1956, 10, 12), 
#    'James Webb': datetime.date(1970, 12, 15), 
#    'Libby Herold': datetime.date(1996, 2, 10), 
#    'Radha SKumar': datetime.date(1994, 3, 5)
#}

normalized_dob_dict = normalize_dict_names(dob_dict)
print(normalized_dob_dict)
{'avkash chauhan': datetime.date(1990, 1, 1), 'james jackson': datetime.date(1956, 10, 12), 'james webb': datetime.date(1970, 12, 15), 'libby herold': datetime.date(1996, 2, 10), 'radha s kumar': datetime.date(1994, 3, 5)}
In [62]:
def normalize_name(first_name, last_name):
    return " ".join((first_name + " " + last_name).split()).lower()

def update_dob_validation(df, dob_dict):
    print(df.head())  # Print a sample of the dataframe before the update

    
    updated_rows_dob = 0
    normalized_dob_dict = {normalize_name(" ".join(key.split()[:-1]), key.split()[-1]): value for key, value in dob_dict.items()}

    for index, row in df.iterrows():
        full_name = normalize_name(row['First Name'], row['Last Name'])
        dob_str = row['DateofBirth']

        if full_name in normalized_dob_dict:
            dob_from_csv = datetime.datetime.strptime(dob_str, '%d %B %Y').date()
            if dob_from_csv == normalized_dob_dict[full_name]:
                df.at[index, 'DoBValidation'] = "TRUE"
                updated_rows_dob += 1

    return df, updated_rows_dob

def update_flight_manifest():
    account_name = ''
    account_key = ''
    blob_service_client = BlobServiceClient(account_url=f"https://{account_name}.blob.core.windows.net", credential=account_key)

    read_container_name = 'flightmanifestwrite'
    read_blob_name = 'FlightManifest.csv'
    blob_client = blob_service_client.get_blob_client(container=read_container_name, blob=read_blob_name)
    blob_data = blob_client.download_blob()
    blob_content = blob_data.readall()

    df = pd.read_csv(io.BytesIO(blob_content))

#    dob_dict = {
#    'avkash chauhan': datetime.date(1990, 1, 1),
#    'james jackson': datetime.date(1956, 10, 12),
#    'james webb': datetime.date(1970, 12, 15),
#    'libby herold': datetime.date(1996, 2, 10),
#    'radha s kumar': datetime.date(1994, 3, 5)
#    }
    
    
    df, updated_dobs = update_dob_validation(df, normalized_dob_dict)  # DoB validation
    print(df.head())  # Print a sample of the dataframe before the update

    print(f"Total Rows Updated for DoBValidation: {updated_dobs}")

    write_container_name = 'flightmanifestwrite'
    write_blob_name = 'FlightManifest.csv'
    output_stream = io.StringIO()
    df.to_csv(output_stream, index=False)
    output_stream.seek(0)
    blob_client = blob_service_client.get_blob_client(container=write_container_name, blob=write_blob_name)
    blob_client.upload_blob(output_stream.getvalue(), overwrite=True)

    print("Data successfully uploaded to flightmanifestwrite")

update_flight_manifest()
  Carrier  Flight No.     Class           From       To            Date  \
0      UA         234   Economy  San Francisco  Chicago  April 20, 2022   
1      UA         234   Economy  San Francisco  Chicago  April 20, 2022   
2      UA         234   Economy  San Francisco  Chicago  April 20, 2022   
3      UA         234  Business  San Francisco  Chicago  April 20, 2022   
4      UA         234   Economy  San Francisco  Chicago  April 20, 2022   

  Baggage Seat Gate Boarding Time  Ticket No First Name Last Name Sex  \
0     YES  20A   G1  10:00 AM PST   34236746     Avkash   Chauhan   M   
1     YES   1A   G1  10:00 AM PST   34236747      James      Webb   M   
2      No  25B   G1  10:00 AM PST   34236748      James   Jackson   M   
3      No   3D   G1  10:00 AM PST   34236749      Libby    Herold   F   
4     YES  34B   G1  10:00 AM PST   34236750    Radha S     Kumar   F   

        DateofBirth  DoBValidation  PersonValidation  LuggageValidation  \
0    1 January 1990          False              True              False   
1  15 December 1970          False              True              False   
2   12 October 1956          False              True              False   
3  10 February 1996          False              True              False   
4      5 March 1994          False              True              False   

   NameValidation  BoardingPassValidation  
0            True                    True  
1            True                    True  
2            True                    True  
3            True                    True  
4            True                    True  
  Carrier  Flight No.     Class           From       To            Date  \
0      UA         234   Economy  San Francisco  Chicago  April 20, 2022   
1      UA         234   Economy  San Francisco  Chicago  April 20, 2022   
2      UA         234   Economy  San Francisco  Chicago  April 20, 2022   
3      UA         234  Business  San Francisco  Chicago  April 20, 2022   
4      UA         234   Economy  San Francisco  Chicago  April 20, 2022   

  Baggage Seat Gate Boarding Time  Ticket No First Name Last Name Sex  \
0     YES  20A   G1  10:00 AM PST   34236746     Avkash   Chauhan   M   
1     YES   1A   G1  10:00 AM PST   34236747      James      Webb   M   
2      No  25B   G1  10:00 AM PST   34236748      James   Jackson   M   
3      No   3D   G1  10:00 AM PST   34236749      Libby    Herold   F   
4     YES  34B   G1  10:00 AM PST   34236750    Radha S     Kumar   F   

        DateofBirth DoBValidation  PersonValidation  LuggageValidation  \
0    1 January 1990          TRUE              True              False   
1  15 December 1970          TRUE              True              False   
2   12 October 1956          TRUE              True              False   
3  10 February 1996          TRUE              True              False   
4      5 March 1994          TRUE              True              False   

   NameValidation  BoardingPassValidation  
0            True                    True  
1            True                    True  
2            True                    True  
3            True                    True  
4            True                    True  
Total Rows Updated for DoBValidation: 5
Data successfully uploaded to flightmanifestwrite

Set James Web to have a Lighter as aTest case¶

In [63]:
def set_luggage_validation():
    # Define Azure Storage details
    account_name = ''
    account_key = ''
    blob_service_client = BlobServiceClient(account_url=f"https://{account_name}.blob.core.windows.net", credential=account_key)
    
    # Read data from flightmanifestwrite
    write_container_name = 'flightmanifestwrite'
    write_blob_name = 'FlightManifest.csv'
    blob_client = blob_service_client.get_blob_client(container=write_container_name, blob=write_blob_name)
    blob_data = blob_client.download_blob()
    blob_content = blob_data.readall()
    
    # Convert to pandas dataframe
    df = pd.read_csv(io.BytesIO(blob_content))

    # Debugging: Print current dataframe as it would appear in a CSV
    print("Before DataFrame:")
    print(df.to_string(index=False))

    # Set LuggageValidation to TRUE for all rows
    df['LuggageValidation'] = "True"

    # Set LuggageValidation to FALSE for James Webb
    df.loc[(df['First Name'] == 'James') & (df['Last Name'] == 'Webb'), 'LuggageValidation'] = "False"

    # Debugging: Print updated dataframe as it would appear in a CSV
    print("\nUpdated DataFrame after setting LuggageValidation:")
    print(df.to_string(index=False))

    # Write the modified dataframe back to flight_manifest_write
    output_stream = io.StringIO()
    df.to_csv(output_stream, index=False)
    output_stream.seek(0)  # Move the cursor to the start of the file
    blob_client = blob_service_client.get_blob_client(container=write_container_name, blob=write_blob_name)
    blob_client.upload_blob(output_stream.getvalue(), overwrite=True)

    print("\nLuggageValidation has been updated and data successfully uploaded to flightmanifestwrite")

# Call the function to execute it
set_luggage_validation()
Before DataFrame:
Carrier  Flight No.    Class          From      To           Date Baggage Seat Gate Boarding Time  Ticket No First Name Last Name Sex      DateofBirth  DoBValidation  PersonValidation  LuggageValidation  NameValidation  BoardingPassValidation
     UA         234  Economy San Francisco Chicago April 20, 2022     YES  20A   G1  10:00 AM PST   34236746     Avkash   Chauhan   M   1 January 1990           True              True              False            True                    True
     UA         234  Economy San Francisco Chicago April 20, 2022     YES   1A   G1  10:00 AM PST   34236747      James      Webb   M 15 December 1970           True              True              False            True                    True
     UA         234  Economy San Francisco Chicago April 20, 2022      No  25B   G1  10:00 AM PST   34236748      James   Jackson   M  12 October 1956           True              True              False            True                    True
     UA         234 Business San Francisco Chicago April 20, 2022      No   3D   G1  10:00 AM PST   34236749      Libby    Herold   F 10 February 1996           True              True              False            True                    True
     UA         234  Economy San Francisco Chicago April 20, 2022     YES  34B   G1  10:00 AM PST   34236750    Radha S     Kumar   F     5 March 1994           True              True              False            True                    True

Updated DataFrame after setting LuggageValidation:
Carrier  Flight No.    Class          From      To           Date Baggage Seat Gate Boarding Time  Ticket No First Name Last Name Sex      DateofBirth  DoBValidation  PersonValidation LuggageValidation  NameValidation  BoardingPassValidation
     UA         234  Economy San Francisco Chicago April 20, 2022     YES  20A   G1  10:00 AM PST   34236746     Avkash   Chauhan   M   1 January 1990           True              True              True            True                    True
     UA         234  Economy San Francisco Chicago April 20, 2022     YES   1A   G1  10:00 AM PST   34236747      James      Webb   M 15 December 1970           True              True             False            True                    True
     UA         234  Economy San Francisco Chicago April 20, 2022      No  25B   G1  10:00 AM PST   34236748      James   Jackson   M  12 October 1956           True              True              True            True                    True
     UA         234 Business San Francisco Chicago April 20, 2022      No   3D   G1  10:00 AM PST   34236749      Libby    Herold   F 10 February 1996           True              True              True            True                    True
     UA         234  Economy San Francisco Chicago April 20, 2022     YES  34B   G1  10:00 AM PST   34236750    Radha S     Kumar   F     5 March 1994           True              True              True            True                    True

LuggageValidation has been updated and data successfully uploaded to flightmanifestwrite

Set Kumar to False to Test negative case on Boarding Validation¶

In [64]:
def set_boarding_pass_validation():
    # Define Azure Storage details
    account_name = ''
    account_key = ''
    blob_service_client = BlobServiceClient(account_url=f"https://{account_name}.blob.core.windows.net", credential=account_key)
    
    # Read data from flightmanifestwrite
    write_container_name = 'flightmanifestwrite'
    write_blob_name = 'FlightManifest.csv'
    blob_client = blob_service_client.get_blob_client(container=write_container_name, blob=write_blob_name)
    blob_data = blob_client.download_blob()
    blob_content = blob_data.readall()
    
    # Convert to pandas dataframe
    df = pd.read_csv(io.BytesIO(blob_content))

    # Debugging: Print current dataframe as it would appear in a CSV
    print("Before DataFrame:")
    print(df.to_string(index=False))

    # Set BoardingPassValidation to FALSE for Radha S Kumar
    df.loc[(df['First Name'] == 'Radha S') & (df['Last Name'] == 'Kumar'), 'BoardingPassValidation'] = "False"

    # Debugging: Print updated dataframe as it would appear in a CSV
    print("\nUpdated DataFrame after setting BoardingPassValidation:")
    print(df.to_string(index=False))

    # Write the modified dataframe back to flight_manifest_write
    output_stream = io.StringIO()
    df.to_csv(output_stream, index=False)
    output_stream.seek(0)  # Move the cursor to the start of the file
    blob_client = blob_service_client.get_blob_client(container=write_container_name, blob=write_blob_name)
    blob_client.upload_blob(output_stream.getvalue(), overwrite=True)

    print("\nBoardingPassValidation has been updated and data successfully uploaded to flightmanifestwrite")

# Call the function to execute it
set_boarding_pass_validation()
Before DataFrame:
Carrier  Flight No.    Class          From      To           Date Baggage Seat Gate Boarding Time  Ticket No First Name Last Name Sex      DateofBirth  DoBValidation  PersonValidation  LuggageValidation  NameValidation  BoardingPassValidation
     UA         234  Economy San Francisco Chicago April 20, 2022     YES  20A   G1  10:00 AM PST   34236746     Avkash   Chauhan   M   1 January 1990           True              True               True            True                    True
     UA         234  Economy San Francisco Chicago April 20, 2022     YES   1A   G1  10:00 AM PST   34236747      James      Webb   M 15 December 1970           True              True              False            True                    True
     UA         234  Economy San Francisco Chicago April 20, 2022      No  25B   G1  10:00 AM PST   34236748      James   Jackson   M  12 October 1956           True              True               True            True                    True
     UA         234 Business San Francisco Chicago April 20, 2022      No   3D   G1  10:00 AM PST   34236749      Libby    Herold   F 10 February 1996           True              True               True            True                    True
     UA         234  Economy San Francisco Chicago April 20, 2022     YES  34B   G1  10:00 AM PST   34236750    Radha S     Kumar   F     5 March 1994           True              True               True            True                    True

Updated DataFrame after setting BoardingPassValidation:
Carrier  Flight No.    Class          From      To           Date Baggage Seat Gate Boarding Time  Ticket No First Name Last Name Sex      DateofBirth  DoBValidation  PersonValidation  LuggageValidation  NameValidation BoardingPassValidation
     UA         234  Economy San Francisco Chicago April 20, 2022     YES  20A   G1  10:00 AM PST   34236746     Avkash   Chauhan   M   1 January 1990           True              True               True            True                   True
     UA         234  Economy San Francisco Chicago April 20, 2022     YES   1A   G1  10:00 AM PST   34236747      James      Webb   M 15 December 1970           True              True              False            True                   True
     UA         234  Economy San Francisco Chicago April 20, 2022      No  25B   G1  10:00 AM PST   34236748      James   Jackson   M  12 October 1956           True              True               True            True                   True
     UA         234 Business San Francisco Chicago April 20, 2022      No   3D   G1  10:00 AM PST   34236749      Libby    Herold   F 10 February 1996           True              True               True            True                   True
     UA         234  Economy San Francisco Chicago April 20, 2022     YES  34B   G1  10:00 AM PST   34236750    Radha S     Kumar   F     5 March 1994           True              True               True            True                  False

BoardingPassValidation has been updated and data successfully uploaded to flightmanifestwrite

Set PersonValidation to FALSE for James Jackson, as a TEST case¶

In [65]:
def set_person_validation():
    # Define Azure Storage details
    account_name = ''
    account_key = ''
    blob_service_client = BlobServiceClient(account_url=f"https://{account_name}.blob.core.windows.net", credential=account_key)
    
    # Read data from flightmanifestwrite
    write_container_name = 'flightmanifestwrite'
    write_blob_name = 'FlightManifest.csv'
    blob_client = blob_service_client.get_blob_client(container=write_container_name, blob=write_blob_name)
    blob_data = blob_client.download_blob()
    blob_content = blob_data.readall()
    
    # Convert to pandas dataframe
    df = pd.read_csv(io.BytesIO(blob_content))

    # Debugging: Print current dataframe as it would appear in a CSV
    print("Before DataFrame:")
    print(df.to_string(index=False))

    # Set PersonValidation to FALSE for James Jackson
    df.loc[(df['First Name'] == 'James') & (df['Last Name'] == 'Jackson'), 'PersonValidation'] = "False"

    # Debugging: Print updated dataframe as it would appear in a CSV
    print("\nUpdated DataFrame after setting PersonValidation:")
    print(df.to_string(index=False))

    # Write the modified dataframe back to flight_manifest_write
    output_stream = io.StringIO()
    df.to_csv(output_stream, index=False)
    output_stream.seek(0)  # Move the cursor to the start of the file
    blob_client = blob_service_client.get_blob_client(container=write_container_name, blob=write_blob_name)
    blob_client.upload_blob(output_stream.getvalue(), overwrite=True)

    print("\nPersonValidation has been updated and data successfully uploaded to flightmanifestwrite")

# Call the function to execute it
set_person_validation()
Before DataFrame:
Carrier  Flight No.    Class          From      To           Date Baggage Seat Gate Boarding Time  Ticket No First Name Last Name Sex      DateofBirth  DoBValidation  PersonValidation  LuggageValidation  NameValidation  BoardingPassValidation
     UA         234  Economy San Francisco Chicago April 20, 2022     YES  20A   G1  10:00 AM PST   34236746     Avkash   Chauhan   M   1 January 1990           True              True               True            True                    True
     UA         234  Economy San Francisco Chicago April 20, 2022     YES   1A   G1  10:00 AM PST   34236747      James      Webb   M 15 December 1970           True              True              False            True                    True
     UA         234  Economy San Francisco Chicago April 20, 2022      No  25B   G1  10:00 AM PST   34236748      James   Jackson   M  12 October 1956           True              True               True            True                    True
     UA         234 Business San Francisco Chicago April 20, 2022      No   3D   G1  10:00 AM PST   34236749      Libby    Herold   F 10 February 1996           True              True               True            True                    True
     UA         234  Economy San Francisco Chicago April 20, 2022     YES  34B   G1  10:00 AM PST   34236750    Radha S     Kumar   F     5 March 1994           True              True               True            True                   False

Updated DataFrame after setting PersonValidation:
Carrier  Flight No.    Class          From      To           Date Baggage Seat Gate Boarding Time  Ticket No First Name Last Name Sex      DateofBirth  DoBValidation PersonValidation  LuggageValidation  NameValidation  BoardingPassValidation
     UA         234  Economy San Francisco Chicago April 20, 2022     YES  20A   G1  10:00 AM PST   34236746     Avkash   Chauhan   M   1 January 1990           True             True               True            True                    True
     UA         234  Economy San Francisco Chicago April 20, 2022     YES   1A   G1  10:00 AM PST   34236747      James      Webb   M 15 December 1970           True             True              False            True                    True
     UA         234  Economy San Francisco Chicago April 20, 2022      No  25B   G1  10:00 AM PST   34236748      James   Jackson   M  12 October 1956           True            False               True            True                    True
     UA         234 Business San Francisco Chicago April 20, 2022      No   3D   G1  10:00 AM PST   34236749      Libby    Herold   F 10 February 1996           True             True               True            True                    True
     UA         234  Economy San Francisco Chicago April 20, 2022     YES  34B   G1  10:00 AM PST   34236750    Radha S     Kumar   F     5 March 1994           True             True               True            True                   False

PersonValidation has been updated and data successfully uploaded to flightmanifestwrite

Print Results¶

In [66]:
account_name = ''
account_key = ''
container_name = 'flightmanifestwrite'
blob_name = 'FlightManifest.csv'

def get_updated_manifest_from_blob(account_name, account_key, container_name, blob_name):
    from azure.storage.blob import BlobServiceClient
    from io import StringIO

    # Constructing the connection string
    conn_str = f"DefaultEndpointsProtocol=https;AccountName={account_name};AccountKey={account_key};EndpointSuffix=core.windows.net"
    blob_service_client = BlobServiceClient.from_connection_string(conn_str)

    blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name)

    # Download the blob content to a string
    data = blob_client.download_blob().content_as_text()
    df = pd.read_csv(StringIO(data))
    
    # Ensure columns are cleaned of any leading/trailing spaces
    df.columns = df.columns.str.strip()

    # Convert to boolean for easier comparison
    for col in ['DoBValidation', 'NameValidation', 'BoardingPassValidation', 'LuggageValidation', 'PersonValidation']:
        if col in df.columns:
            series = df[col].astype(str).str.strip().str.upper()
            df[col] = series == "TRUE"
        else:
            print(f"Warning: Column {col} not found in DataFrame.")
    return df

df = get_updated_manifest_from_blob(account_name, account_key, container_name, blob_name)

for index, row in df.iterrows():
    print("------------------------------------------------------------")
    if not row['PersonValidation']:
        print(f"Dear Mr. {row['First Name']} {row['Last Name']},")
        print(f"You are welcome to flight # {row['Flight No.']} leaving at {row['Boarding Time']} from {row['From']} to {row['To']}.")
        print(f"Your seat number is {row['Seat']}, and it is confirmed.")
        print(f"We did not find a prohibited item (lighter) in your carry-on baggage.")
        print("Thanks for following the procedure.")
        print("Your identity could not be verified. Please see a customer service representative.")
    else:
        if row['DoBValidation'] and row.get('NameValidation', False) and row.get('BoardingPassValidation', False):
            print(f"Dear Mr. {row['First Name']} {row['Last Name']},")
            print(f"You are welcome to flight # {row['Flight No.']} leaving at {row['Boarding Time']} from {row['From']} to {row['To']}.")
            print(f"Your seat number is {row['Seat']}, and it is confirmed.")
            if not row['LuggageValidation']:
                print(f"We have found a prohibited item in your carry-on baggage, and it is flagged for removal.")
                print("Your identity is verified. However, your baggage verification failed, so please see a customer service representative.")
            else:
                print(f"We did not find a prohibited item (lighter) in your carry-on baggage,")
                print(f"thanks for following the procedure.")
                print("Your identity is verified so please board the plane.")
        elif row['DoBValidation'] and row.get('NameValidation', False) and not row.get('BoardingPassValidation', False):
            print(f"Dear Sir/Madam,")
            print(f"Your boarding pass for flight # {row['Flight No.']} leaving at {row['Boarding Time']} from {row['From']} to {row['To']} has not been validated.")
            print(f"Please visit our customer service desk for further assistance.")
        else:
            print(f"Dear Sir/Madam,")
            print(f"There seems to be an issue with your credentials for flight # {row['Flight No.']} leaving at {row['Boarding Time']} from {row['From']} to {row['To']}.")
            print(f"Please visit our customer service desk for further assistance.")
    print("\n")
print("------------------------------------------------------------")
------------------------------------------------------------
Dear Mr. Avkash Chauhan,
You are welcome to flight # 234 leaving at 10:00 AM PST from San Francisco to Chicago.
Your seat number is 20A, and it is confirmed.
We did not find a prohibited item (lighter) in your carry-on baggage,
thanks for following the procedure.
Your identity is verified so please board the plane.


------------------------------------------------------------
Dear Mr. James Webb,
You are welcome to flight # 234 leaving at 10:00 AM PST from San Francisco to Chicago.
Your seat number is 1A, and it is confirmed.
We have found a prohibited item in your carry-on baggage, and it is flagged for removal.
Your identity is verified. However, your baggage verification failed, so please see a customer service representative.


------------------------------------------------------------
Dear Mr. James Jackson,
You are welcome to flight # 234 leaving at 10:00 AM PST from San Francisco to Chicago.
Your seat number is 25B, and it is confirmed.
We did not find a prohibited item (lighter) in your carry-on baggage.
Thanks for following the procedure.
Your identity could not be verified. Please see a customer service representative.


------------------------------------------------------------
Dear Mr.  Libby Herold,
You are welcome to flight # 234 leaving at 10:00 AM PST from San Francisco to Chicago.
Your seat number is 3D, and it is confirmed.
We did not find a prohibited item (lighter) in your carry-on baggage,
thanks for following the procedure.
Your identity is verified so please board the plane.


------------------------------------------------------------
Dear Sir/Madam,
Your boarding pass for flight # 234 leaving at 10:00 AM PST from San Francisco to Chicago has not been validated.
Please visit our customer service desk for further assistance.


------------------------------------------------------------

Resources¶

  • https://docs.microsoft.com/en-us/samples/azure/azure-sdk-for-python/formrecognizer-samples/
In [ ]:
 
In [ ]: